import pymysqlfrom scrapy.exceptions import DropItemimport timeclass ErshouchePipeline(object): def __init__(self): self.conn = pymysql.connect( host = '127.0.0.1', port = 3306, user = 'root', passwd = 'mlpythonlmoi', db = 'ershouche', charset = 'utf8' ) self.cusor = self.conn.cursor(cursor=pymysql.cursors.DictCursor) sql1 = "select 路由網址 from 二手車之家" result = self.cusor.execute(sql1)#讀取已經爬取的數據url # print(result) temp = self.cusor.fetchall()#返回查詢到的全部記錄 print('返回查詢獲得的記錄:',temp) self.url_list = [] for i in temp: self.url_list.append(i['路由網址']) print('存在的:',self.url_list) def process_item(self, item, spider): if item['car_url'] not in self.url_list: sql = "insert into 二手車之家 values(Null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" lst = (item['city'],item['trademark'],item['model'],item['colour'],item['price'],item['purpose'], item['vehicle_condition'],item['drive_mode'],item['Truck_kilometer'],item['car_license'], item['Stop_displacemen'],item['year_jian_due'],item['insurance_policy_matures'],item['assurance_due'], item['emission_standard'],item['guohu_number'],item['maintenance'],item['car_url']) self.cusor.execute(sql,lst) self.conn.commit() else: raise DropItem('該item數據庫中已經存在!') return item def close_spider(self, spider): self.cusor.close() self.conn.close()#關閉鏈接 print("操做結束!") print('結束時間:' + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))