三種urllib實現網頁下載,含cookie模擬登錄

coding=UTF-8

import re
import urllib.request, http.cookiejar, urllib.parse

#
# print('---------------第一種方法----------------------')
# URL = 'https://baike.baidu.com/item/%E5%B7%B4%E6%B2%99%E5%B0%94%C2%B7%E9%98%BF%E8%90%A8%E5%BE%B7/2867946?fromtitle=%E9%98%BF%E8%90%A8%E5%BE%B7&fromid=9693472'
# response = urllib.request.urlopen(URL)
# if response.getcode() == 200:
#     conf = response.read()
#     print(conf)
# else:
#     print('Fail')
#
# print('---------------第二種方法----------------------')
# # 建立 request 對象
# request = urllib.request.Request(URL)
#
# # 封裝 request 對象
# request.add_header('User-Agent', 'Mozilla/5.0')
#
# # 發送帶頭信息的請求
# response1 = urllib.request.urlopen(request)
# if response1.getcode() == 200:
#     conf = response1.read()
#     print(conf)
# else:
#     print('Fail')

print('---------------第三種方法----------------------')
URL2 = 'http://lczl.cnki.net/jbdetail/index?query=1'
URL3 = 'http://r.cnki.net/Klogin/Login.aspx?ReturnUrl=http://lczl.cnki.net/jbdetail/index?query=1'
# 建立cookieJar做爲cookie容器
cj = http.cookiejar.CookieJar()

# 建立一個opener
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))

# 給urllib.request安裝opener
urllib.request.install_opener(opener)

# 封裝 request 對象
request1 = urllib.request.urlopen(URL2)


def getVIEWSTATE(data):
    cer = re.compile('name=\"__VIEWSTATE\" id=\"__VIEWSTATE\" value=\"(.*)\"', flags=0)
    strlist = cer.findall(data)
    # print(data)
    return strlist[0]


VIEWSTATE = getVIEWSTATE(request1.read().decode())
# print(VIEWSTATE)
data = {'__VIEWSTATE': VIEWSTATE.encode(), 'userName': '345666561@qq.com', 'passWord': '215501',
        'iplogin': 0}  # 登錄用戶名和密碼
post_data = urllib.parse.urlencode(data).encode()

request2 = urllib.request.Request(URL3, post_data)
request2.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0')

response2 = urllib.request.urlopen(request2)
if response2.getcode() == 200:
    conf = response2.read()
    print(conf.decode('utf8'))
    print(cj)
else:
    print('Fail')
相關文章
相關標籤/搜索