import scrapy class imoocall_Spider(scrapy.Spider): name = "imoocall" start_urls = [ 'https://www.imooc.com/course/list?page=2', ] def parse(self, response): for course in response.css('div.course-card-container'): yield { '類別': course.css('label::text').extract_first(), '課程名': course.css('h3::text').extract_first(), '難度級別': course.css('span::text').extract(), '簡介': course.css('p::text').extract_first(), } next_page = response.css('div.page a::attr("href")')[9].extract() if next_page is not None: next_page = response.urljoin(next_page) yield scrapy.Request(next_page, self.parse)
import scrapy class imooc_Spider(scrapy.Spider): name = "imooc" def start_requests(self): for num in range(1,32): numstr = str(num) url = "https://www.imooc.com/course/list?page="+numstr yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): for course in response.css('div.course-card-container'): yield { '類別': course.css('label::text').extract_first(), '課程名': course.css('h3::text').extract_first(), '難度級別': course.css('span::text').extract(), '簡介': course.css('p::text').extract_first(), }