#這裏只爬取第一頁 items.py import scrapy #定義爬取數據 class InsistItem(scrapy.Item): image_urls=scrapy.Field() tengxun.py import scrapy from insist.items import InsistItem import json class TengxunSpider(scrapy.Spider): name = 'tengxun' allowed_domains = ['douyucdn.cn'] start_urls = ['http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset='] def parse(self, response): item=InsistItem() con=json.loads(response.body) datas=con['data'] print(datas) for i in datas: item['image_urls']=[i['vertical_src']]#很是重要,因爲 #{'scrapy.pipelines.images.ImagesPipeline': 301}用到的是圖片的url列表,即便是一個連接也要用列表 yield item settings.py ITEM_PIPELINES = { #'insist.pipelines.InsistPipeline': 300, 'scrapy.pipelines.images.ImagesPipeline': 1, } IMAGES_STORE='C:\\Users\\lenovo\\Desktop\\data'#圖片保存地址 IMAGES_URLS_FIELD='image_urls'#保存連接的字段 pipelines.py import scrapy from scrapy.pipelines.images import ImagesPipeline#導包 class SDPipeline(ImagesPipeline): def get_media_requests(self,item,info): image_link=item['image_urls'] yield scrapy.Request(image_link) 最後scrapy crawl tengxun 而後在所寫的圖片的目錄中打開一個full的文件夾查看圖片