scrapy簡單爬取圖片

#這裏只爬取第一頁

items.py
import scrapy
#定義爬取數據
class InsistItem(scrapy.Item):
    image_urls=scrapy.Field()

tengxun.py
import scrapy
from insist.items import InsistItem
import json

class TengxunSpider(scrapy.Spider):
    name = 'tengxun'
    allowed_domains = ['douyucdn.cn']
    start_urls = ['http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset=']
    def parse(self, response):
       item=InsistItem()
       con=json.loads(response.body)
       datas=con['data']
       print(datas)
       for i in datas:
            item['image_urls']=[i['vertical_src']]#很是重要,因爲
 #{'scrapy.pipelines.images.ImagesPipeline': 301}用到的是圖片的url列表,即便是一個連接也要用列表
            yield item

settings.py
ITEM_PIPELINES = {
  #'insist.pipelines.InsistPipeline': 300,
   'scrapy.pipelines.images.ImagesPipeline': 1,
}
IMAGES_STORE='C:\\Users\\lenovo\\Desktop\\data'#圖片保存地址
IMAGES_URLS_FIELD='image_urls'#保存連接的字段

pipelines.py
import scrapy
from scrapy.pipelines.images import ImagesPipeline#導包
class SDPipeline(ImagesPipeline):
    def get_media_requests(self,item,info):
        image_link=item['image_urls']
        yield scrapy.Request(image_link)

最後scrapy crawl tengxun
而後在所寫的圖片的目錄中打開一個full的文件夾查看圖片
相關文章
相關標籤/搜索