class WeatherPipeline(object): def process_item(self, item, spider): print(item) return item #插入到redis import redis import json class RedisPipeline(object): def __init__(self,host,port,password): self.host=host self.port=port self.password=password @classmethod def from_crawler(cls, crawler): return cls( host=crawler.settings.get('RE_HOST'), port=crawler.settings.get('RE_PORT', '6379'), password=crawler.settings.get('RE_PASS', 'xxxxx') ) def open_spider(self, spider): pool = redis.ConnectionPool(host=self.host,password=self.password,port=self.port,db=3) self.client=redis.Redis(connection_pool=pool) # print(self.client) def process_item(self, item, spider): self.client.hmset(item['city'],dict(item)) # self.client.lpush('weather',json.dumps(dict(item))) # self.client.sadd('weathers',json.dumps(dict(item))) # return item return item #插入到mongoDB import pymongo class MongoPipeline(object): collection_name = 'tianqi' def __init__(self, mongo_host, mongo_db): self.mongo_host = mongo_host self.mongo_db = mongo_db @classmethod def from_crawler(cls, crawler): return cls( mongo_host=crawler.settings.get('MO_HOST'), mongo_db=crawler.settings.get('MO_DB', 'weather') ) def open_spider(self, spider): self.client = pymongo.MongoClient(host=self.mongo_host) self.db = self.client[self.mongo_db] def close_spider(self, spider): self.client.close() def process_item(self, item, spider): self.db[self.collection_name].insert_one(dict(item)) return item #插入mysql 數據庫 import pymysql class MysqlPipeline(object): def __init__(self,host,username,password,database,port,charset): self.host=host self.username=username self.password=password self.database=database self.port=port self.charset=charset @classmethod def from_crawler(cls, crawler): return cls( host=crawler.settings.get('MY_HOST'), username=crawler.settings.get('MY_USER'), password=crawler.settings.get('MY_PASS'), database=crawler.settings.get('MY_DATA'), port=crawler.settings.get('MY_PORT'), charset=crawler.settings.get('MY_CHARSET'), ) def open_spider(self,spider): self.client=pymysql.connect(host=self.host,user=self.username,password=self.password,database=self.database,port=self.port,charset=self.charset) self.cursor=self.client.cursor() def close_spider(self, spider): self.cursor.close() self.client.close() def process_item(self, item, spider): self.cursor.execute("INSERT INTO weather (`sheng`,`city`,`hqiwen`,`lqiwen`) VALUES (%s,%s,%s,%s)",(item['sheng'],item['city'],item['hqiwen'],item['lqiwen'])) self.client.commit() return item