使用http.cookiejar帶cookie信息登陸爬取方法 -《狗嗨默示錄》-

Login.pyhtml

# !/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib.request
import urllib.parse
import user_info
import http.cookiejar
import re
import time
import socket


cookie = http.cookiejar.CookieJar() #建立cookieJar保存cookie
handler = urllib.request.HTTPCookieProcessor(cookie) #建立cookie處理對象
opener = urllib.request.build_opener(handler) #構建攜帶cookie的打開方式

post_url = 'http://www.ks5u.com/user/inc/UserLogin_Index.asp'
def login():
    req = urllib.request.Request(post_url,user_info.data) #建立請求
    # html = urllib.request.urlopen('http://www.ks5u.com/user/inc/UserLogin_Index.asp',data='username=513021339@qq.com&password=qq4980&c_add=1')
    #print(html).decode('gb2312')
    html = opener.open(req).read() #開啓請求,保存登陸cookie
    return html.decode('gb2312')

if u'513021339@qq.com' in login():
    print('登陸成功')
else:
    print('登陸失敗')

def getlist():
    #選擇專題模擬類型
    req = urllib.request.Request('http://www.ks5u.com/zhuantimoni/ashx/jinbang.ashx',data='xueke=1&shenfen=32'.encode('utf-8'))
    html = opener.open(req).read().decode('utf-8')
    reg = r'<a href="(.+?)" target="_blank" title="(.+?)">'
    return re.findall(reg,html)

def getfile(id,name,url):
    req = urllib.request.Request('http://www.ks5u.com/USER/INC/Dpwnsch.asp?id=%s'%id)
    req.add_header('Referer',url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
    open(name.decode('utf-8')+'.doc','wb').write(opener.open(req).read().decode('utf-8'))

for item in getlist():
    url = item[0]
    name = item[1]
    print(name)
    id = url.split('/')[-1][:-6]
    try:
        getfile(id,name,url)
        time.sleep(2)
        socket.setdefaulttimeout(30)
    except Exception as e:
        print('下載失敗')
相關文章
相關標籤/搜索