python --爬蟲 打碼登錄

雲平臺打碼 http://www.yundama.com/php

帳號:zhenghongyu 密碼: 381650127yhz html

豆瓣打碼:json

import http.client, mimetypes, urllib, json, time, requests######################################################################class YDMHttp:    apiurl = 'http://api.yundama.com/api.php'    username = ''    password = ''    appid = ''    appkey = ''    def __init__(self, username, password, appid, appkey):        self.username = username        self.password = password        self.appid = str(appid)        self.appkey = appkey    def request(self, fields, files=[]):        response = self.post_url(self.apiurl, fields, files)        response = json.loads(response)        return response    def balance(self):        data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid,                'appkey': self.appkey}        response = self.request(data)        if (response):            if (response['ret'] and response['ret'] < 0):                return response['ret']            else:                return response['balance']        else:            return -9001    def login(self):        data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid,                'appkey': self.appkey}        response = self.request(data)        if (response):            if (response['ret'] and response['ret'] < 0):                return response['ret']            else:                return response['uid']        else:            return -9001    def upload(self, filename, codetype, timeout):        data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid,                'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)}        file = {'file': filename}        response = self.request(data, file)        if (response):            if (response['ret'] and response['ret'] < 0):                return response['ret']            else:                return response['cid']        else:            return -9001    def result(self, cid):        data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid,                'appkey': self.appkey, 'cid': str(cid)}        response = self.request(data)        return response and response['text'] or ''    def decode(self, filename, codetype, timeout):        cid = self.upload(filename, codetype, timeout)        if (cid > 0):            for i in range(0, timeout):                result = self.result(cid)                if (result != ''):                    return cid, result                else:                    time.sleep(1)            return -3003, ''        else:            return cid, ''    def report(self, cid):        data = {'method': 'report', 'username': self.username, 'password': self.password, 'appid': self.appid,                'appkey': self.appkey, 'cid': str(cid), 'flag': '0'}        response = self.request(data)        if (response):            return response['ret']        else:            return -9001    def post_url(self, url, fields, files=[]):        for key in files:            files[key] = open(files[key], 'rb');        res = requests.post(url, files=files, data=fields)        return res.text######################################################################import requestsimport urllib.requestfrom bs4 import BeautifulSoupheaders = {    'Referer': 'https://www.baidu.com/link?url=N6aTegg-CdRmi4aTkTmQmRzJE6tRkZzRO-ugJmD-uTuBBi6G7gJi7Qv-hmKtcao5&wd=&eqid=ae67fe3d000188b8000000025b95e54a',    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'}html = requests.get('https://www.douban.com/', headers=headers).textsoup = BeautifulSoup(html, 'html.parser')headers = {    'Host': 'www.douban.com',    'Origin': 'https://www.douban.com',    'Referer': 'https://www.douban.com/',    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'}if soup.find('img', attrs={'class': 'captcha_image'}) is None: #  登錄沒有驗證碼    data = {        'source': 'index_nav',        'form_email': '1753442757 @ qq.com',        'form_password': 'zcl19980901',    }    s = requests.Session()    responses = s.post('https://www.douban.com/accounts/login', data=data, headers=headers).text    print(responses)    print(s.get('https://www.douban.com/', headers=headers).text)else: # 登錄有驗證碼    img_url = soup.find('img', attrs={'class': 'captcha_image'}).get('src')    urllib.request.urlretrieve(img_url, './getimage.jpg')    id = img_url.split('id=')[-1].split('&')[0]    # 用戶名    username = 'zhenghongyu'    # 密碼    password = '381650127yhz'    # 軟件ID,開發者分紅必要參數。登陸開發者後臺【個人軟件】得到!    appid = 1    # 軟件密鑰,開發者分紅必要參數。登陸開發者後臺【個人軟件】得到!    appkey = '22cc5376925e9387a23cf797cb9ba745'    # 圖片文件    filename = 'getimage.jpg'    # 驗證碼類型,# 例:1004表示4位字母數字,不一樣類型收費不一樣。請準確填寫,不然影響識別率。在此查詢全部類型 http://www.yundama.com/price.html    codetype = 3000    # 超時時間,秒    timeout = 60    # 檢查    if (username == 'username'):        print('請設置好相關參數再測試')    else:        # 初始化        yundama = YDMHttp(username, password, appid, appkey)        # 登錄雲打碼        uid = yundama.login();        print('uid: %s' % uid)        # 查詢餘額        balance = yundama.balance();        print('balance: %s' % balance)        # 開始識別,圖片路徑,驗證碼類型ID,超時時間(秒),識別結果        cid, result = yundama.decode(filename, codetype, timeout);        print('cid: %s, result: %s' % (cid, result))        data = {            'source': 'index_nav',            'form_email': '1753442757@qq.com',            'form_password': 'zcl19980901',            'captcha-solution': result,            'captcha-id': id        }        print(data)        s = requests.Session()        responses = s.post('https://www.douban.com/accounts/login', data=data, headers=headers).text        print(s.get('https://www.douban.com/', headers=headers).text)
相關文章
相關標籤/搜索