scrapy Pipeline 練習

class WeatherPipeline(object):

    def process_item(self, item, spider):
        print(item)
        return item
#插入到redis
import redis
import json
class RedisPipeline(object):
    def __init__(self,host,port,password):
        self.host=host
        self.port=port
        self.password=password
    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            host=crawler.settings.get('RE_HOST'),
            port=crawler.settings.get('RE_PORT', '6379'),
            password=crawler.settings.get('RE_PASS', 'xxxxx')
        )
    def open_spider(self, spider):
        pool = redis.ConnectionPool(host=self.host,password=self.password,port=self.port,db=3)
        self.client=redis.Redis(connection_pool=pool)
        # print(self.client)
    def process_item(self, item, spider):
        self.client.hmset(item['city'],dict(item))
        # self.client.lpush('weather',json.dumps(dict(item)))
        # self.client.sadd('weathers',json.dumps(dict(item)))
        # return item
        return item
#插入到mongoDB
import pymongo
class MongoPipeline(object):

    collection_name = 'tianqi'

    def __init__(self, mongo_host, mongo_db):
        self.mongo_host = mongo_host
        self.mongo_db = mongo_db

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            mongo_host=crawler.settings.get('MO_HOST'),
            mongo_db=crawler.settings.get('MO_DB', 'weather')
        )

    def open_spider(self, spider):
        self.client = pymongo.MongoClient(host=self.mongo_host)
        self.db = self.client[self.mongo_db]

    def close_spider(self, spider):
        self.client.close()

    def process_item(self, item, spider):
        self.db[self.collection_name].insert_one(dict(item))
        return item
#插入mysql 數據庫
import pymysql
class MysqlPipeline(object):
    def __init__(self,host,username,password,database,port,charset):
        self.host=host
        self.username=username
        self.password=password
        self.database=database
        self.port=port
        self.charset=charset
    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            host=crawler.settings.get('MY_HOST'),
            username=crawler.settings.get('MY_USER'),
            password=crawler.settings.get('MY_PASS'),
            database=crawler.settings.get('MY_DATA'),
            port=crawler.settings.get('MY_PORT'),
            charset=crawler.settings.get('MY_CHARSET'),
        )
    def open_spider(self,spider):
        self.client=pymysql.connect(host=self.host,user=self.username,password=self.password,database=self.database,port=self.port,charset=self.charset)
        self.cursor=self.client.cursor()
    def close_spider(self, spider):
        self.cursor.close()
        self.client.close()
    def process_item(self, item, spider):
        self.cursor.execute("INSERT INTO weather (`sheng`,`city`,`hqiwen`,`lqiwen`) VALUES (%s,%s,%s,%s)",(item['sheng'],item['city'],item['hqiwen'],item['lqiwen']))
        self.client.commit()
        return item
相關文章
相關標籤/搜索