# -*- coding: utf-8 -*-
#這只是爬蟲文件內容,使用pycharm運行,在terminal中使用命令行,要用爬蟲名字
import scrapy
from insist.items import InsistItem
class InsistsSpider(scrapy.Spider):
name = 'insists'
allowed_domains = ['itcast.cn']
start_urls = ['http://www.itcast.cn/channel/teacher.shtml']
def parse(self, response):
node_list=response.xpath("//div[@class='li_txt']")
items=[]
for node in node_list:
#建立item字段對象,用來存儲信息
item=InsistItem()#items裏面的類
name=node.xpath("./h3/text()").extract()#extract()將xpath對象轉化爲Unicode字符串
title=node.xpath("./h4/text()").extract()
info=node.xpath("./p/text()").extract()
item['name']=name[0]
item['title']=title[0]
item['info']=info[0]
items.append(item)
return items
#pass