import scrapy
from ZuCai.items import ZucaiItem
from ZuCai.spiders.get_date import GetBetweenday
class ZucaiSpider(scrapy.Spider):
name = 'zucai'
allowed_domains = ['trade.500.com/jczq/']
start_urls = ['https://trade.500.com/jczq/']
def start_requests(self):
next_url = GetBetweenday('2019-04-15', 'https://trade.500.com/jczq/') -----這裏調用獲取日期的函數,這裏是獲取2019-04-15到當前日期
for url in next_url:
yield scrapy.Request(url, callback=self.parse)
def parse(self, response):
datas = response.xpath('//div[@class="bet-main bet-main-dg"]/table/tbody/tr')
for data in datas:
item = ZucaiItem()
item['League'] = data.xpath('.//td[@class="td td-evt"]/a/text()').extract()[0]
item['Time'] = data.xpath('.//td[@class="td td-endtime"]/text()').extract()[0]
item['Home_team'] = data.xpath('.//span[@class="team-l"]/a/text()').extract()[0]
item['Result'] = data.xpath('.//i[@class="team-vs team-bf"]/a/text()').extract()[0]
item['Away_team'] = data.xpath('.//span[@class="team-r"]/a/text()').extract()[0]
item['Win'] = data.xpath('.//div[@class="betbtn-row itm-rangB1"]/p[1]/span/text()').extract()[0]
item['Level'] = data.xpath('.//div[@class="betbtn-row itm-rangB1"]/p[2]/span/text()').extract()[0]
item['Negative'] = data.xpath('.//div[@class="betbtn-row itm-rangB1"]/p[3]/span/text()').extract()[0]
yield item
執行過程當中可能回報超出數組限制,須要將extract()[0]換成extract_first()
至此爬取任意日期到當前日期之間的競彩數據完成,能夠在數據庫中看到完成的數據