首先要作的:python
pip install mysqlclient #若是是使用centos 須要 yum install python-devel mysql-devel
修改數據,因爲咱們抓取的時間格式是str 須要轉換成date存入數據庫mysql
import datetime try: create_date=datetime.datetime.strptime('create_date',"%Y/%m/%d").date() except Exception as e: create_date =datetime.datetime.now().date() #若是沒有就取當前時間 article_item['create_date'] =create_date
創建MysqlPipelinesql
import MySQLdb class MysqlPipeline(object): def __init__(self): self.conn=MySQLdb.connect('localhost','root','root','article',charset='utf8',use_unicode=True) self.cursor=self.conn.cursor() def process_item(self, item, spider): insert_sql=""" insert into article(title,url,create_date,url_object_id,front_image_url,front_image_path, praise,collect_nums,comment_nums,contents,tags)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) """ self.cursor.execute(insert_sql,(item['title'],item['url'],item['create_date'],item['url_object_id'], item['front_image_url'],item['front_image_path'],item['praise'],item['collect_nums'],item['comment_nums'],item['contents'],item['tags'] )) self.conn.commit()
PIPELINE添加配置數據庫
ITEM_PIPELINES = { 'spider_first.pipelines.ArticleImagePipeline': 1, 'spider_first.pipelines.MysqlPipeline':2, }
settings.py MYSQL_HOST='localhost' MYSQL_DBNAME='article' MYSQL_USER='root' MYSQL_PASSWORD='root'
建立異步pipelinecentos
import MySQLdb import MySQLdb.cursors from twisted.enterprise import adbapi class MysqlTwistPipeline(object): @classmethod def from_settings(cls,settings):#名稱固定 會被scrapy調用 直接可用setting的值 adbparams=dict( host=settings['MYSQL_HOST'], db = settings['MYSQL_DBNAME'], user = settings['MYSQL_USER'], password = settings['MYSQL_PASSWORD'], charset='utf8', cursorclass=MySQLdb.cursors.DictCursor, use_unicode=True, ) #這是連接數據庫的另外一種方法,在settings中寫入參數 dbpool=adbapi.ConnectionPool('MySQLdb',**adbparams) return cls(dbpool) def __init__(self,dbpool): self.dbpool=dbpool def process_item(self, item, spider): #使用twiest將mysql插入變成異步 query=self.dbpool.runInteraction(self.do_insert,item) #由於異步 可能有些錯誤不能及時爆出 query.addErrback(self.handle_error) #處理異步的異常 def handle_error(self,failure): print('failure') def do_insert(self,cursor,item): insert_sql = """ insert into article(title,url,create_date,url_object_id,front_image_url,front_image_path, praise,collect_nums,comment_nums,contents,tags)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) """ cursor.execute(insert_sql, (item['title'], item['url'], item['create_date'], item['url_object_id'],item['front_image_url'], item['front_image_path'], item['praise'],item['collect_nums'], item['comment_nums'], item['contents'], item['tags']))
最後一樣添加配置便可api