爬蟲cast

 1 import requests
 2 
 3 
 4 def test():
 5     # 1,目標url
 6     # url = "https://www.baidu.com/" #地址欄中輸入的都是 GET 請求
 7 
 8     url = "https://www.baidu.com/"
 9 
10     # 2,發送請求
11     # response = requests.get(url) #response 是響應對象,它是Response
12 
13     response = requests.get(url)
14     #響應對象的相關屬性:
15     
16     # ,解析響應對象
17     # data = response.text  #data 是 str 類型 有時候不對(它是自行推測 可能可能的編碼,主動的解碼!)!
18     # data = response.content #data 此時是 bytes類型 (首選 .content 屬性)
19 
20     data = response.content  # bytes類型
21     # a,狀態碼 status_code
22     code = response.status_code
23     # b,請求頭 request headers
24     request_headers = response.request.headers
25     # print(request_headers) # {'User-Agent': 'python-requests/2.22.0', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive'}
26     # c,響應頭
27     response_headers = response.headers
28     # print(response_headers)
29     # d,請求cookie
30     request_cookies = response.request._cookies  # 返回 RequestsCookieJar對象
31     # print(request_cookies)
32     # e,響應cookie
33     response_cookies = response.cookies  # 返回 RequestsCookieJar對象
34     # print(response_cookies)
35 
36 
37 
38     # 3,保存
39     with open("d:\\test.html", 'w', encoding="utf8") as f:
40         f.write(data.decode("utf8"))
41 
42     pass
43 
44 
45 if __name__ == '__main__':
46     test()
47 
48     pass
響應對象的相關屬性
 1     USER_AGENT_LIST=[
 2         # Opera
 3         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
 4         "Opera/8.0 (Windows NT 5.1; U; en)",
 5         "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
 6         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
 7         # Firefox
 8         "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
 9         "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
10         # Safari
11         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
12         # chrome
13         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
14         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
15         "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
16         # 360
17         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
18         "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
19         # 淘寶瀏覽器
20         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
21         # 獵豹瀏覽器
22         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
23         "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
24         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
25         # QQ瀏覽器
26         "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
27         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
28         # sogou瀏覽器
29         "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
30         "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
31         # maxthon瀏覽器
32         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
33         # UC瀏覽器
34         
35         
36         
37         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
38         # IPhone
39         "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
40         # IPod
41         "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
42         # IPAD
43         "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
44         "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
45         # Android
46         "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
47         "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
48         # QQ瀏覽器 Android版本
49         "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
50         # Android Opera Mobile
51         "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
52         # Android Pad Moto Xoom
53         "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
54         # BlackBerry
55         "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
56         # WebOS HP Touchpad
57         "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
58         # Nokia N97
59         "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
60         # Windows Phone Mango
61         "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
62         # UC瀏覽器
63         "UCWEB7.0.2.37/28/999",
64         "NOKIA5700/ UCWEB7.0.2.37/28/999",
65         # UCOpenwave
66         "Openwave/ UCWEB7.0.2.37/28/999",
67         # UC Opera
68         "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
69 
70         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
71         "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
72         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
73         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
74         "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
75         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
76         "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
77         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
78         "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
79         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
80         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
81         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
82         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
83         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
84         "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
85         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
86         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
87         "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
88     ]
user-agent池
  1 import requests
  2 import random
  3 
  4 #手動寫 User-Agent,模擬真實的瀏覽器!
  5 def test():
  6     # 1,目標url
  7     url = "https://www.baidu.com/"
  8 
  9     #user-agent池
 10     USER_AGENT_LIST = [
 11         # Opera
 12         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
 13         "Opera/8.0 (Windows NT 5.1; U; en)",
 14         "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
 15         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
 16         # Firefox
 17         "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
 18         "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
 19         # Safari
 20         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
 21         # chrome
 22         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
 23         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
 24         "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
 25         # 360
 26         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
 27         "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
 28         # 淘寶瀏覽器
 29         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
 30         # 獵豹瀏覽器
 31         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
 32         "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
 33         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
 34         # QQ瀏覽器
 35         "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
 36         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
 37         # sogou瀏覽器
 38         "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
 39         "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
 40         # maxthon瀏覽器
 41         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
 42         # UC瀏覽器
 43 
 44         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
 45         # IPhone
 46         "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 47         # IPod
 48         "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 49         # IPAD
 50         "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
 51         "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 52         # Android
 53         "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 54         "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 55         # QQ瀏覽器 Android版本
 56         "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 57         # Android Opera Mobile
 58         "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
 59         # Android Pad Moto Xoom
 60         "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
 61         # BlackBerry
 62         "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
 63         # WebOS HP Touchpad
 64         "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
 65         # Nokia N97
 66         "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
 67         # Windows Phone Mango
 68         "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
 69         # UC瀏覽器
 70         "UCWEB7.0.2.37/28/999",
 71         "NOKIA5700/ UCWEB7.0.2.37/28/999",
 72         # UCOpenwave
 73         "Openwave/ UCWEB7.0.2.37/28/999",
 74         # UC Opera
 75         "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
 76 
 77         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
 78         "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
 79         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
 80         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
 81         "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
 82         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
 83         "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
 84         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
 85         "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
 86         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
 87         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
 88         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
 89         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
 90         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
 91         "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
 92         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
 93         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
 94         "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
 95     ]
 96     # 修改 請求頭 信息中 的user-agent 信息
 97     myHeaders = {
 98         "User-Agent": random.choice(USER_AGENT_LIST)
 99     }
100     # print(random.choice(USER_AGENT_LIST))
101 
102 
103     # 2,發送請求
104     response = requests.get(url,headers = myHeaders)
105 
106     # ,解析響應對象
107     # print(response.request.headers)
108     data_str = response.content.decode("utf8")
109 
110     # 3,保存
111     with open("d:\\test.html", 'w', encoding="utf8") as f:
112         f.write(data_str)
113 
114     pass
115 
116 
117 if __name__ == '__main__':
118     test()
修改 請求頭中的user-agent

 

  1 import requests
  2 import random
  3 
  4 # 代碼傳參
  5 def test():
  6     # 1,目標url
  7     # url = "https://www.so.com/s?q=" +"你好"  # request 會自動轉義中文字符
  8     url = "https://www.so.com/s"
  9     so_params= {
 10         "q":"Python",
 11     }
 12     #user-agent池
 13     USER_AGENT_LIST = [
 14         # Opera
 15         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
 16         "Opera/8.0 (Windows NT 5.1; U; en)",
 17         "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
 18         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
 19         # Firefox
 20         "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
 21         "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
 22         # Safari
 23         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
 24         # chrome
 25         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
 26         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
 27         "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
 28         # 360
 29         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
 30         "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
 31         # 淘寶瀏覽器
 32         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
 33         # 獵豹瀏覽器
 34         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
 35         "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
 36         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
 37         # QQ瀏覽器
 38         "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
 39         "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
 40         # sogou瀏覽器
 41         "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
 42         "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
 43         # maxthon瀏覽器
 44         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
 45         # UC瀏覽器
 46 
 47         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
 48         # IPhone
 49         "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 50         # IPod
 51         "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 52         # IPAD
 53         "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
 54         "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 55         # Android
 56         "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 57         "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 58         # QQ瀏覽器 Android版本
 59         "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 60         # Android Opera Mobile
 61         "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
 62         # Android Pad Moto Xoom
 63         "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
 64         # BlackBerry
 65         "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
 66         # WebOS HP Touchpad
 67         "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
 68         # Nokia N97
 69         "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
 70         # Windows Phone Mango
 71         "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
 72         # UC瀏覽器
 73         "UCWEB7.0.2.37/28/999",
 74         "NOKIA5700/ UCWEB7.0.2.37/28/999",
 75         # UCOpenwave
 76         "Openwave/ UCWEB7.0.2.37/28/999",
 77         # UC Opera
 78         "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
 79 
 80         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
 81         "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
 82         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
 83         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
 84         "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
 85         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
 86         "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
 87         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
 88         "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
 89         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
 90         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
 91         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
 92         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
 93         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
 94         "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
 95         "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
 96         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
 97         "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
 98     ]
 99     # 修改 請求頭 信息中 的user-agent 信息
100     myHeaders = {
101         "User-Agent": random.choice(USER_AGENT_LIST)
102     }
103 
104     # 2,發送請求
105     response = requests.get(url,headers = myHeaders,params=so_params)
106 
107     # ,解析響應對象
108     # print(response.request.headers)
109     data_str = response.content.decode("utf8")
110     # 3,保存
111     with open("d:\\test.html", 'w', encoding="utf-8") as f:
112         f.write(data_str)
113     pass
114 
115 
116 if __name__ == '__main__':
117     test()
帶參數
 1 import requests
 2 import random
 3 
 4 def test(url):
 5     # 1,目標url (圖片)
 6     # url = "https://www.baidu.com/img/bd_logo1.png"
 7     # 修改 請求頭 信息中 的user-agent 信息
 8     myHeaders = {
 9         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
10     }
11 
12     # 2,發送請求
13     response = requests.get(url,headers = myHeaders)
14 
15     # 3 ,解析響應對象
16     data = response.content  # bytes  類型
17     # 4,保存
18     with open("d:\\test.png", 'wb') as f:
19         f.write(data)
20     pass
21 
22 
23 if __name__ == '__main__':
24     test("https://p.ssl.qhimg.com/t012cdb572f41b93733.png")  
爬圖片
import requests

# http://tieba.baidu.com/f?kw=%E7%BE%8E%E9%A3%9F&ie=utf-8&pn=50  kw  和  pn
class TiebaSpider:
    def __init__(self):
        self.base_url = "http://tieba.baidu.com/f"
        self.headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"}

    #1,發送請求
    def send_request(self,params):
        response =  requests.get(self.base_url,headers=self.headers,params=params)
        data = response.content
        return data
    #2,保存數據
    def write_file(self,data):
        with open("01tieba.html","wb") as f:
            f.write(data)
    #3,調度方法
    def run(self):
        #構造參數字典
        tieba_params ={
            "kw":"美食吧",
            "pn":50
        }
        #發請求
        data =  self.send_request(tieba_params)
        #保存本地
        self.write_file(data)


if __name__ == '__main__':
    tiebaSpider = TiebaSpider()
    tiebaSpider.run()
爬百度貼吧

上面的百度貼吧沒有實現翻頁,下面實現翻頁:javascript

 1 import requests
 2 import random
 3 USER_AGENT_LIST = [
 4     # Opera
 5     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
 6     "Opera/8.0 (Windows NT 5.1; U; en)",
 7     "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
 8     "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
 9     # Firefox
10     "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
11     "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
12     # Safari
13     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
14     # chrome
15     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
16     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
17     "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
18     # 360
19     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
20     "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
21 ]
22 
23 # http://tieba.baidu.com/f?kw=%E7%BE%8E%E9%A3%9F&ie=utf-8&pn=50  kw  和  pn
24 class TiebaSpider:
25     def __init__(self):
26         self.tieba_name = input("請輸入你想爬取的貼吧名字>>>")
27         self.start_pageNum = int(input("請輸入開始頁數>>>"))
28         self.end_pageNum = int(input("請輸入結束頁數>>>"))
29 
30         self.base_url = "http://tieba.baidu.com/f"
31         self.headers = {"User-Agent":random.choice(USER_AGENT_LIST)}
32 
33     #1,發送請求
34     def send_request(self,params):
35         response =  requests.get(self.base_url,headers=self.headers,params=params)
36         data = response.content
37         return data
38     #2,保存數據
39     def write_file(self,data,page):
40         file_Path = "d:\\tieba\\"+self.tieba_name+str(page)+".html"
41         print("正在抓取 【"+self.tieba_name+"】 的第{} 頁".format(page))
42         with open(file_Path,"wb") as f:
43             f.write(data)
44     #3,run方法
45     def run(self):
46         for page in range(self.start_pageNum,self.end_pageNum+1):
47             #構造參數字典
48             tieba_params ={
49                 "kw":self.tieba_name,
50                 "pn":(page -1)*50
51             }
52             #發請求
53             data =  self.send_request(tieba_params)
54             #保存本地
55             self.write_file(data,page)
56 
57 
58 if __name__ == '__main__':
59     tiebaSpider = TiebaSpider()
60     tiebaSpider.run()
爬取多頁

 

 1 import requests
 2 
 3 # https://wenku.baidu.com/search?word=%CB%BE%BB%FA&pn=20 word 和  pn
 4 class WenkuSpider:
 5     def __init__(self):
 6         self.wenku_content = input("請輸入百度文庫搜索的內容>>>")
 7         self.start_page = int(input("請輸入開始爬取的頁數>>>"))
 8         self.end_page = int(input("請輸入結束爬取的頁數>>>"))
 9 
10         self.base_url = "https://wenku.baidu.com/search"
11         self.headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"}
12 
13     #1,發送請求
14     def send_request(self,params):
15         response =  requests.get(self.base_url,headers=self.headers,params=params)
16         data = response.content
17         return data
18     #2,保存數據
19     def write_file(self,data,page):
20         filePath = "d:/wenku/"+self.wenku_content+ str(page) +".html"
21         print("正在爬取 "+self.wenku_content+""+str(page)+"")
22         with open(filePath,"wb") as f:
23             f.write(data)
24     #3,run方法
25     def run(self):
26         #構造參數字典
27         for page in range(self.start_page,self.end_page+1):
28             tieba_params ={
29                 "word":self.wenku_content,
30                 "pn":(page-1)*10
31             }
32             #發請求
33             data =  self.send_request(tieba_params)
34             #保存本地
35             self.write_file(data,page)
36 
37 
38 if __name__ == '__main__':
39     wenkuSpider = WenkuSpider()
40     wenkuSpider.run()
爬取百度文庫

 

 1 import requests
 2 import random
 3 USER_AGENT_LIST = [
 4     # Opera
 5     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
 6     "Opera/8.0 (Windows NT 5.1; U; en)",
 7     "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
 8     "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
 9     # Firefox
10     "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
11     "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
12     # Safari
13     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
14     # chrome
15     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
16     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
17     "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
18     # 360
19     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
20     "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
21 ]
22 
23 # 豆瓣電影
24 # https://movie.douban.com/j/chart/top_list  ?type=5&interval_id=100%3A90&action=&start=0&limit=20
25 class DoubanSpider:
26     def __init__(self):
27         self.base_url = "https://movie.douban.com/j/chart/top_list/"
28         self.headers = {"User-Agent":random.choice(USER_AGENT_LIST)}
29 
30     def send_request(self,params):
31         response =  requests.get(self.base_url,headers = self.headers,params=params)
32         data = response.content.decode("utf8")
33         return data  #str 類型
34 
35     def write_file(self,data ):
36         filePath = "d:\\douban\\1.html"
37         with open(filePath,"w",encoding="utf-8") as f:
38             f.write(data)
39 
40     def run(self):
41         # 構建 params字典
42         params = {
43             "type":5,
44             "interval_id":"100:90",
45             "action": "",
46             "start": 0,
47             "limit": 20
48         }
49         data = self.send_request(params)
50         self.write_file(data)
51 if __name__ == '__main__':
52     tool = DoubanSpider()
53     tool.run()
爬取豆瓣電影排行榜的數據(Ajax )

 

  1 from bs4 import BeautifulSoup
  2 
  3 html = """
  4 <!doctype html>
  5 <html>
  6 <head>
  7     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 <meta name="keywords" content="" />
  9 <meta name="description" content="" />
 10 <meta name="viewport" content="user-scalable=no, initial-scale=1.0, width=device-width" />
 11 <meta name="apple-mobile-web-app-status-bar-style" content="grey" />
 12 <meta name="apple-mobile-web-app-status-bar-style" content="black" />
 13 <meta name="format-detection" content="telephone=no"/>
 14 <title>有道翻譯</title>
 15 <link rel="shortcut icon" href="//shared.ydstatic.com/dict/v5.15/images/icon.png" type="image/x-icon" />
 16 <link rel="apple-touch-icon" href="//shared.ydstatic.com/dict/v5.15/images/icon.png" />
 17 
 18     <!-- <link rel="stylesheet" type="text/css" href="../../styles/global-min.css">
 19     <link rel="stylesheet" type="text/css" href="../../styles/p-fanyi-min.css"> -->
 20     <style type="text/css">
 21     html{-webkit-text-size-adjust:none}body{background-color:#fff;font-family:"Times New Roman",Times,serif;font-size:14px;line-height:21px;color:#313131}body,div,h1,h2,h3,h4,h5,input,li,ol,p,textarea,ul{margin:0;padding:0;outline:0}li,ol,ul{list-style:none}input{-webkit-appearance:none}a{color:#138bff;text-decoration:none;cursor:pointer}a:active,a:hover{color:#138bff}strong{color:#c50000;word-wrap:break-word}#bd{background:#f4f4f4;padding:7px 0}.p-index_entry #bd{background:#fff;padding:0}.p-index #hd{border-bottom:1px solid #e8e8e8}.content-wrp{margin:7px}#ft{padding:7px 0;background:#fff}.empty-content{background:#FFF;padding:35px 7px;vertical-align:middle}.btn{border-radius:4px;-webkit-border-radius:4px;-moz-border-radius:4px;-o-border-radius:4px;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;padding:11px 35px;display:inline-block;cursor:pointer;white-space:nowrap;vertical-align:top;border:1px solid #076ccc;background:-webkit-gradient(linear,left top,left bottom,from(#0082ff),to(#0082ff));background:-webkit-linear-gradient(top,#0082ff,#0082ff);background:-moz-linear-gradient(top,#0082ff,#0082ff);background:-o-linear-gradient(top,#0082ff,#0082ff);font-size:14px}.nav-label{background-color:#fff;border-bottom:1px solid #e0e0e0;display:table;width:100%}.nav-label:after,.nav-label:before{content:" ";display:table}.nav-label:after{clear:both}.nav-label .logo{display:table-cell;width:1%;vertical-align:middle;padding:11px 7px}.nav-label .nav{display:table;width:100%;text-align:center}.nav-label .nav li{display:table-cell}.nav-label .nav li.active{border-bottom:3px solid #0082ff}.nav-label .nav a{display:inline-block;padding:11px 7px;color:#646464;position:relative}.search-area{background:#fff;position:relative}.search-bar{position:relative;display:table;border-collapse:separate;width:100%;padding:11px 7px 7px;box-sizing:border-box;-webkit-box-sizing:border-box;-moz-box-sizing:border-box}.search-area .clearInput{position:absolute;top:14px;right:57px;width:34px;height:0;padding-top:34px;overflow:hidden;background:url(//shared.ydstatic.com/dict/youdaowap/changeImg/close.png) no-repeat 0 0;background-size:100%;display:none}.search-bar .form-control{display:table-cell;padding:11px 7px;border:1px solid #d9d9d9;border-top-color:silver;background-image:none;vertical-align:top;width:99%;font-size:14px;border-radius:0}.search-bar .input-group-btn{display:table-cell;width:1%;vertical-align:top}.search-bar .btn{border-radius:0 4px 4px 0;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;-o-border-radius:0 4px 4px 0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;padding:11px 25px;display:inline-block;cursor:pointer;white-space:nowrap;vertical-align:top;border:1px solid #076ccc;background:-webkit-gradient(linear,left top,left bottom,from(#0082ff),to(#0082ff));background:-webkit-linear-gradient(top,#0082ff,#0082ff);background:-moz-linear-gradient(top,#0082ff,#0082ff);background:-o-linear-gradient(top,#0082ff,#0082ff);background:#0082ff url(//shared.ydstatic.com/dict/youdaowap/changeImg/search.png) no-repeat 13px 6px;box-sizing:border-box;-webkit-box-sizing:border-box;font-size:14px;background-size:50%}.search-bar .selectedLang{display:table-cell;background:#fff;border:1px solid #d9d9d9;border-right:0;border-top-color:silver;border-radius:4px 0 0 4px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;-o-border-radius:4px 0 0 4px;color:#313131;width:1%;vertical-align:middle}.search-bar .selectedLang span{display:inline-block;width:30px;padding:0 7px}.search-area .lang-select-list{position:absolute;background:#fff;border:1px solid #d9d9d9;left:7px;top:51px;display:none}.search-area .lang-select-list li{padding:7px 22px;border-bottom:1px solid #d9d9d9;-webkit-tap-highlight-color:red}.page{border-radius:2px;-webkit-border-radius:2px;-moz-border-radius:2px;-o-border-radius:2px;border:1px solid #e1e1e1;background:#fff;display:block;text-align:center;color:#313131}.page a{width:49%;display:inline-block;text-align:center;padding:11px 0}.copy{text-align:center;font-size:12px;color:#b2b2b2}.copy a{margin:0 7px;color:#8f9dae}#suggest{position:absolute;top:53px;width:100%;padding:0 7px;box-sizing:border-box;-webkit-box-sizing:border-box;-moz-box-sizing:border-box}#suggest .container{border:1px solid #D9D9D9;border-top:0;border-bottom:0}#suggest .container li{padding:7px 0;border-bottom:1px solid #D9D9D9;overflow:hidden;-webkit-text-overflow:ellipsis;text-overflow:ellipsis;white-space:nowrap;background:#fff;-webkit-tap-highlight-color:#ff0}#suggest .close{border:1px solid #D9D9D9;box-sizing:border-box;width:100%;border-top:0;background:#fff}#suggest .close a{display:inline-block;width:100%;padding:7px 0}#suggest .container li:active{background:#D8D8D8}#suggest .container:last-child{border-bottom:0}#suggest .container strong{margin:0 7px;color:#313131}.translate-area .original{margin:0 0 7px;height:110px}.translate-area .original textarea{width:100%;height:110px;border:1px solid #E1E1E1;resize:none;padding:11px;box-sizing:border-box;-webkit-box-sizing:border-box;-moz-box-sizing:border-box}.translate-area .select{margin:0 7px 7px}.translate-area .select select{height:38px;width:140px;border:1px solid #a6d7ff;appearance:none;-webkit-appearance:none;-moz-appearance:none;background:#ecf6ff url(//shared.ydstatic.com/dict/youdaowap/icon/down.png) no-repeat 115px 8px}.translate-area .select option{height:40px}.translate-area .select .blue-btn{border-radius:4px;-webkit-border-radius:4px;-moz-border-radius:4px;-o-border-radius:4px;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;display:inline-block;cursor:pointer;white-space:nowrap;vertical-align:top;border:1px solid #a6d7ff;background:#ecf6ff url(//shared.ydstatic.com/dict/youdaowap/icon/icon.png) no-repeat 24px 6px;padding:11px 21px 11px 54px;float:right}.translate-area .generate{border-top:1px solid #e1e1e1;background:#fff;padding:11px 7px 0;min-height:110px}.translate-area .generate .tip{color:#999}#clearTextarea{display:inline-block;padding:10px}.fanyi-dl{display:block;padding:7px;border:1px solid #E1E1E1;background-color:#FAFAFA;font-size:12px}    html{
 22         height: 100%;
 23     }
 24     body.from-chuansongmen{
 25         height: 100%;
 26         background: #f4f4f4;
 27     }
 28     .from-chuansongmen .tab {
 29         width: 100%;
 30         height: 38px;
 31     }
 32     .from-chuansongmen .tab img {
 33         width: 20px;
 34         height: 20px;
 35         margin: 14px 4px 0 25px;
 36         display: block;
 37         float: left;
 38     }
 39     .from-chuansongmen .tab h5 {
 40         height: 38px;
 41         font-size: 12px;
 42         color: #999;
 43         text-align: left;
 44         line-height: 50px;
 45     }
 46     .from-chuansongmen #ft{
 47         display:none;
 48     }
 49     </style>
 50 </head>
 51 <body >
 52 <div id="doc2">
 53         <div id="hd">
 54         <div class="nav-label">
 55     <span class="logo"><a href="/"><img src="//shared.ydstatic.com/images/mobile/logo-mobile-whole.gif" width="71" height="16" alt="有道"></a></span>
 56     <ul class="nav">
 57 
 58                     <li><a class="pr-link-a" href="/dict?q=">詞典</a></li>
 59 
 60                     <li class="active"><a href="/translate">翻譯</a></li>
 61                 <li><a href="/redirect?keyfrom=youdaowap.index&url=http%3a%2f%2fm.note.youdao.com%2fnoteproxy%2flogin%3fkeyfrom%3dhome.m">筆記</a></li>
 62     </ul>
 63 </div>
 64     </div>
 65         <div id="bd">
 66         <div class="translate-area">
 67             <form action="/translate" method="POST" name="translate">
 68                 <div class="content-wrp original">
 69                     <textarea id="inputText" name="inputtext">我是張昌博</textarea>
 70                 </div>
 71                 <div class="select">
 72                     <select class="convert" name="type">
 73                                                 <option value="AUTO" selected="selected"> 自動檢測 </option>
 74                                                                         <option value="ZH_CN2EN"> 中譯英 </option>
 75                                                                             <option value="ZH_CN2JA"> 中譯日 </option>
 76                                                                             <option value="ZH_CN2KR"> 中譯韓 </option>
 77                                                                             <option value="ZH_CN2FR"> 中譯法 </option>
 78                                                                             <option value="ZH_CN2RU"> 中譯俄 </option>
 79                                                                             <option value="ZH_CN2SP"> 中譯西 </option>
 80                                                                             <option value="EN2ZH_CN"> 英譯中 </option>
 81                                                                             <option value="JA2ZH_CN"> 日譯中 </option>
 82                                                                             <option value="KR2ZH_CN"> 韓譯中 </option>
 83                                                                             <option value="FR2ZH_CN"> 法譯中 </option>
 84                                                                             <option value="RU2ZH_CN"> 俄譯中 </option>
 85                                                                             <option value="SP2ZH_CN"> 西譯中 </option>
 86                                             </select>
 87                     <a id="clearTextarea" href="#">清空</a>
 88                     <input class="blue-btn" type="submit" value="翻譯">
 89                 </div>
 90             </form>
 91             <div class="generate">
 92                 <p class="tip">譯文</p>
 93                 <ul id="translateResult">
 94                                                                         <li>I'm zhang changbo</li>
 95                                                             </ul>
 96             </div>
 97         </div>
 98     </div>
 99     <div id="ft">
100         <div class="content-wrp">
101              <a id="toRengong" class="fanyi-dl" href="http://f.youdao.com/m?vendor=fanyimobile">有道人工翻譯&nbsp;-&nbsp;24小時不打烊</a>
102         </div>
103         <p class="copy"> <a href="/">有道首頁</a>|<a href="/fankui">反饋意見</a>|
104             <a href="http://fanyi.youdao.com/?mct=1&keyfrom=translate.m">切換到PC版</a>
105         </p>
106 <p class="copy">&copy;2015&nbsp;公司&nbsp;京ICP證080268號</p>
107 <script type="text/javascript" charset="utf-8">
108     var Suggest=function(t,n){if(0===t.length)throw new Error("輸入元素不存在");this.el=t,this.init(n)},$=function(t){return document.getElementById(t)},trim=function(t){return t.trim?t.trim():t.replace(/^\s+|\s+$/gi,"")},$s={newid:0},sugCount=0,current$s=0,updateId=function(){return $s.newid=sugCount++ + +new Date},currentInput=null;Suggest.prototype={init:function(t){this.initContainer(t),this.initData(t),this.initInput(t)},initContainer:function(t){var n=this,e=t.suggestContainer,i=t.suggestId,a=document.createElement("div");a.id=i,a.class=i,a.innerHTML='<ul class="container"></ul>',a.style.display="none",e.appendChild(a),n.el.addEventListener("focus",function(t){currentInput=t.target.value}),$("selectedLang").addEventListener("click",function(){n.close(t)},!1)},initData:function(t){var n=this;n.getData="function"==typeof t.data?function(n,e){t.data(n,e)}:t.data.length>0?function(n,e){e(n,t.data,t)}:function(n){n(t.data)}},fetchData:function(t){var n=50,e=this;window.clearTimeout(e.delay),current$s=updateId(),e.delay=window.setTimeout(function(){var n=e.el.value;trim(n)!==trim(currentInput)&&(currentInput=n,e.getData(n,function(n,i){current$s===$s.newid&&e.initItem(n,i,t)}))},n)},initItem:function(t,n,e){var i=this,a="";if(""===trim(t))return void i.close(e);for(var u=0;u<n.length;u++)a+='<li class="item">'+e.generateItem(t,n[u])+"</li>";$(e.suggestId).style.display="block",document.querySelector(".container").innerHTML=a,$(e.suggestId).addEventListener("click",function(t){"item"===t.target.className&&(e.clickItemCallback(t),i.close(e))},!1),$(e.suggestId).addEventListener("touchstart",function(t){if("item"===t.target.className){var n=t.target;e.touchstartItem(n)}},!1),$(e.suggestId).addEventListener("touchend",function(t){if("item"===t.target.className){var n=t.target;e.touchendItem(n)}},!1)},initInput:function(t){var n=this;n.el.addEventListener("input",function(e){n.fetchData(t),e.stopPropagation()},!1)},close:function(t){updateId(),$(t.suggestId).style.display="none"}};    __rl_npid="YoudaoWap";var $=function(e){return document.getElementById(e)},trim=function(e){return String.prototype.trim?e.trim():e.replace(/^\s+|\s+$/gi,"")},toggle=function(e){for(var t,n=0,o=e.length;o>n;n++)if(t=e[n],t.style){var i=window.getComputedStyle(t,null).getPropertyValue("display");"none"===i?t.style.display="block":"block"===i&&(t.style.display="none")}},addClassName=function(e,t){if(e){for(var n=0,o=e.className.split(" "),i=o.length,a=[];i>n;n++)""!==trim(o[n])&&a.push(o[n]);return a.push(t),a.join(" ")}},removeClassName=function(e,t){if(e){for(var n=0,o=e.className.split(" "),i=o.length,a=[];i>n;n++)""!==trim(o[n])&&trim(o[n])!==t&&a.push(o[n]);return a.join(" ")}},parseXml=function(e){for(var t=0,n=[],o=e.getElementsByTagName("item");t<o.length;t++){for(var i=o[t].childNodes,a=[],r=0;r<i.length;r++)i[r].nodeType&&1===i[r].nodeType&&a.push(i[r].childNodes[0].nodeValue);n.push({title:a[0],explain:a[1]})}return n},initLangSel=function(){var e=document.querySelectorAll(".lang-select-list"),t=document.querySelector(".lang-select-list"),n={eng:"中英",fr:"中法",jap:"中日",ko:"中韓"},o=function(e){var o,i="https://m.youdao.com/dict?le=";"li"===e.target.nodeName.toLowerCase()&&(o=e.target.getAttribute("data-value"),$("inputLang").value=o,e.target.style.background="#fff",$("selectedLang").querySelector("span").innerHTML=n[o],t.style.display="none",""!==trim($("formInput").value)&&(window.location.href=i+o+"&q="+$("formInput").value)),e.stopPropagation()};$("selectedLang").addEventListener("click",function(t){toggle(e),t.stopPropagation(),t.preventDefault()},!1),t.addEventListener("click",function(e){o(e)},!1),t.addEventListener("touchstart",function(e){e.preventDefault(),e.stopPropagation(),"li"===e.target.nodeName.toLowerCase()&&(e.target.style.background="#e4efff")},!1),t.addEventListener("touchend",function(e){o(e),e.stopPropagation()},!1),document.addEventListener("click",function(){t.style.display="none"},!1)};document.addEventListener("DOMContentLoaded",function(){function e(e,t){var n,o,i,a,l=document.body;n=document.createElement("div"),n.className="shim",l.appendChild(n),n.style.height=document.documentElement.scrollHeight+"px",o=document.createElement("div"),o.className="overlay",i=document.createElement("p"),i.innerHTML=e,a=document.createElement("p"),a.className="confirm",a.innerHTML="好",o.appendChild(i),o.appendChild(a),l.appendChild(o);var c=document.documentElement.scrollTop||document.body.scrollTop;document.addEventListener("orientationchange",function(){c=document.documentElement.scrollTop||document.body.scrollTop,o.style.top=Math.ceil((window.innerHeight-o.offsetHeight)/2)+c+"px"},!1),o.style.top=Math.ceil((window.innerHeight-o.offsetHeight)/2)+c+"px",document.addEventListener("touchmove",r,!1),o.onclick=function(){if(l.removeChild(o),l.removeChild(n),document.removeEventListener("touchmove",r,!1),"function"==typeof t)try{t()}catch(e){}}}var t=$("formInput"),n=function(){document.body?document.body.scrollTop=0:document.documentElement.scrollTop=0,document.body.style.height=window.innerHeight+"px",document.addEventListener("orientationchange",function(){document.body.style.width=window.innerWidth+"px",document.body.style.height=window.innerHeight+"px"},!1)};if(n(),t){var o=$("clearInput"),i=function(){var e=t.value;""!==trim(e)?o.style.display="inline-block":o.style.display="none"};if(i(),$("formSubmit").onsubmit=function(e){var t=$("formInput").value;""===trim(t)&&e.preventDefault()},t.addEventListener("keyup",function(){i()},!1),!!o&&o.addEventListener("click",function(e){t.value="",this.style.display="none",document.querySelector(".lang-select-list").style.display="none",$("suggest").style.display="none",e.preventDefault()},!1),document.querySelector("body").className.indexOf("p-dict")>=0||document.querySelector("body").className.indexOf("p-index_entry")>=0){var a;new Suggest(document.getElementById("formInput"),{suggestContainer:document.querySelector(".search-area"),suggestId:"suggest",data:function(e,t){a=""==$("inputLang").value?"eng":$("inputLang").value;var n="https://dict.youdao.com/suggest?type=DESKDICT&num=4&q="+e+"&ver=2.0&le="+a,o=new XMLHttpRequest;o.onreadystatechange=function(){if(4==o.readyState){var n=new DOMParser,i=n.parseFromString(o.response,"text/xml"),a=parseXml(i);t(e,a)}},o.open("get",n,!0),o.send(null)},generateItem:function(e,t){return"<strong>"+t.title+"</strong>"+t.explain},clickItemCallback:function(e){var t="https://m.youdao.com/dict?q=";window.location.href=t+e.target.querySelector("strong").innerText+"&le="+a},touchstartItem:function(e){e.style.background="#e4f4ff"},touchendItem:function(e){e.className="item";var t="https://m.youdao.com/dict?q=";window.location.href=t+e.querySelector("strong").innerText}});initLangSel()}}document.addEventListener("click",function(e){"pr-link-a"==e.target.className&&t&&(e.target.href=e.target.href+t.value)},!1),!!$("report_feedback")&&$("report_feedback").addEventListener("submit",function(){e("感謝您的反饋",function(){$("issueDes").value="",$("questionDes").value="",$("emailDes").value=""})},!1);var r=function(e){e.preventDefault()}},!1);</script>
109         <script type="text/javascript" charset="utf-8">
110             /*!
111  * linkToManual.js 0.0.1
112  */
113 
114 (function(a){var b={};(function(){var a=0;var c=1;var d=2;b.isLangVaildable=function(b){var c=C(b);var d=c[0];if(d===a){return false}if(d==="auto"){return true}return true};function e(a){var b=a.charCodeAt(0);if(a===" "){return true}else if(b<=32||b===255){return true}else{return false}}var f=[["'","‘","’"],['"',"「","」"],[",",","],[".","。"],[";",";"],[":",":"],["、"],["·"],["/","/"],["?","?"],["\\","\"],["|","|"],["`"],["~","~"],["!","!"],["@","@"],["#","#"],["$","$"],["¥","¥"],["%","%"],["^","^","…"],["&","&"],["_"],["(",")"],["(",")"],["[","]"],["[","]"],["{","}"],["<",">"],["+","+"],["-","-"],["*","*","×"],["÷"],["=","="]];var g=[[10078,12301,12303,12318,65379],[12305,12309,12311,12313,12315],[8250,12297,12299]];var h=[",","。","?",":","(",")","【","】","¥","、","·"];var i={};var j={};for(var k=0;k<f.length;k++){for(var l=0;l<f[k].length;l++){i[f[k][l].charCodeAt(0)]=true}}for(var k=0;k<g.length;k++){for(var l=0;l<g[k].length;l++){i[g[k][l]-1]=true;i[g[k][l]]=true}}for(var k=0;k<h.length;k++){j[h[k]]=true}function m(a){return i[a.charCodeAt(0)]===true}function n(a){return j[a]===true}var o=[[33,47],[58,64],[91,96],[123,126]];function p(a){var b=a.charCodeAt(0);for(var c=0;c<o.length;c++){if(b>=o[c][0]&&b<=o[c][1])return true}return false}var q=[48,57];var r=[65296,65305];function s(a){var b=a.charCodeAt(0);if(b>=q[0]&&b<=q[1]||b>=r[0]&&b<=r[1])return true;return false}var t=[65,90];var u=[97,122];var v=[65313,65338];var w=[65345,65370];function x(a){var b=a.charCodeAt(0);if(b>=t[0]&&b<=t[1]||b>=u[0]&&b<=u[1]||b>=v[0]&&b<=v[1]||b>=w[0]&&b<=w[1])return true;return false}var y=[19968,40959];var z=[13312,19903];var A=[63744,64255];function B(a){var b=a.charCodeAt(0);if(b>=y[0]&&b<=y[1]||b>=z[0]&&b<=z[1]||b>=A[0]&&b<=A[1])return true;return false}function C(b){var f=[];var g=[];var h=0;var i=0;var j=0;var k=0;var l=false;var o=false;var p=true;var q=false;for(var r=0,t=b.length;r<t;r++){var u=b.charAt(r);var v=false;var w=false;if(e(u)){v=w=true;p=true;q=false}else if(m(u)){if(n(u)){j++;p=true;q=false}else if(!l&&!o&&p){j++;q=true;p=false}}else if(x(u)){if(!l&&!o){l=true}p=false;if(q){q=false;j--}}else if(s(u)){if(!o&&!l){o=true}p=false;if(q){q=false;j--}}else if(B(u)){v=w=true;h++;p=true;q=false}else{v=w=true;i++;p=true;q=false}if(l&&(v||r==t-1)||o&&(w||r==t-1)){var y=r;if(y==t-1){y=r+1}var z=b.substring(k,y);if(l){f[f.length]=z}else if(o){g[g.length]=z}if(l||o){k=r}l=false;o=false}if(v&&w){k=r+1;l=false;o=false}}var A=g.length;var C=f.length;var D=C+h;var E=D+A;if(D==0){C=A;D=A;E=A}var F=E+i;var G=i/F;if(G>=.4)return[a,0];var H=C/D;if(H>=.7){return[c,F+j]}var I=h/D;if(I>=.7)return[d,F+j];return[a,0]}})();(function(){document.addEventListener("DOMContentLoaded",function(){var a=$("toRengong");a.onclick=function(a){var c=this;if($("inputText").value!==""&&b.isLangVaildable($("inputText").value)){c.href=c.href+"&text="+encodeURIComponent($("inputText").value)}}},false)})()})(undefined);        </script>
115     </div>
116 </div>
117 <script src="https://c.youdao.com/market/banner/banner-mod.js"></script>
118 <script src="https://c.youdao.com/market/new_banner.js"></script>
119 
120 <script type="text/javascript">
121 document.addEventListener('DOMContentLoaded', function () {
122     var clearTextareaBtn = $('clearTextarea')
123     !!clearTextareaBtn && clearTextareaBtn.addEventListener('click', function (e){
124             document.querySelectorAll('.original textarea')[0].value = '';
125             $('translateResult').innerHTML = '';
126             e.preventDefault();
127     }, false);
128 }, false);
129 </script>
130 </body>
131 </html>
132 """
133 
134 bs = BeautifulSoup(html,"html.parser")
135 ret = bs.find("ul",id="translateResult").li.get_text()  
136 print(ret)
使用bs 獲取html 中的內容

 

 1 import requests
 2 from bs4 import BeautifulSoup
 3 
 4 # 有道翻譯  經過使用app端 來避開復雜的js 破解! 使用POST 方式請求
 5 def youdaofanyi():
 6     url = "https://m.youdao.com/translate"
 7     content = input("請輸入要進行翻譯的內容>>>")
 8     from_data = {
 9         "inputtext": content,
10         "type": "AUTO"
11     }
12     myHeaders = {
13         "User-Agent":"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
14     }
15     response =  requests.post(url,data=from_data,headers =myHeaders)
16 
17     data =  response.content.decode()
18 
19     #data  爲html 文檔  ,下面使用bs4 來提取翻譯後的內容。
20     bs = BeautifulSoup(data,"html.parser")
21     trans_result =  bs.find("ul",id="translateResult").li.get_text()
22     print("翻譯後: ",trans_result)
23 
24 
25 if __name__ == '__main__':
26     youdaofanyi()
爬蟲實現有道翻譯,POST請求,這裏最後解析的是HTML

若是返回的是json 格式,能夠經過 Python 中的json解析 !  css

 1 import requests
 2 from bs4 import BeautifulSoup
 3 import random
 4 USER_AGENT=[
 5     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
 6     # IPhone
 7     "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 8     # IPod
 9     "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
10     # IPAD
11     "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
12     "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
13     # Android
14     "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
15     "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
16     # QQ瀏覽器 Android版本
17     "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
18     # Android Opera Mobile
19     "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
20     # Android Pad Moto Xoom
21     "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
22     # BlackBerry
23     "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
24     # WebOS HP Touchpad
25     "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
26     # Nokia N97
27     "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
28     # Windows Phone Mango
29     "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
30     # UC瀏覽器
31     "UCWEB7.0.2.37/28/999",
32     "NOKIA5700/ UCWEB7.0.2.37/28/999",
33     # UCOpenwave
34     "Openwave/ UCWEB7.0.2.37/28/999",
35     # UC Opera
36     "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
37 ]
38 
39 # 有道翻譯  經過使用app端 來避開復雜的js 破解! 使用POST 方式請求
40 def youdaofanyi():
41     url = "https://m.youdao.com/translate"
42     content = input("請輸入要進行翻譯的內容>>>")
43     from_data = {
44         "inputtext": content,
45         "type": "AUTO"
46     }
47     myHeaders = {
48         "User-Agent":random.choice(USER_AGENT)
49     }
50     response =  requests.post(url,data=from_data,headers =myHeaders)
51 
52     data =  response.content.decode()
53 
54     #data  爲html 文檔  ,下面使用bs4 來提取翻譯後的內容。
55     bs = BeautifulSoup(data,"html.parser")
56     trans_result =  bs.find("ul",id="translateResult").li.get_text()
57     print("翻譯後: ",trans_result)
58 
59 
60 if __name__ == '__main__':
61     youdaofanyi()
有道翻譯,加上 User-Agent池

 

 1 import requests
 2 from bs4 import BeautifulSoup
 3 import random
 4 USER_AGENT=[
 5     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
 6     # IPhone
 7     "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 8     # IPod
 9     "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
10     # IPAD
11     "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
12     "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
13     # Android
14     "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
15     "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
16     # QQ瀏覽器 Android版本
17     "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
18     # Android Opera Mobile
19     "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
20     # Android Pad Moto Xoom
21     "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
22     # BlackBerry
23     "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
24     # WebOS HP Touchpad
25     "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
26     # Nokia N97
27     "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
28     # Windows Phone Mango
29     "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
30     # UC瀏覽器
31     "UCWEB7.0.2.37/28/999",
32     "NOKIA5700/ UCWEB7.0.2.37/28/999",
33     # UCOpenwave
34     "Openwave/ UCWEB7.0.2.37/28/999",
35     # UC Opera
36     "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
37 ]
38 
39 # 登陸人人網 http://www.renren.com/973035534/profile
40 def renren_login():
41     url = "http://www.renren.com/973035534/profile"
42     headers = {
43         "User-Agent":random.choice(USER_AGENT),
44         #1 "Cookie":"anonymid=k3vin92j-utmczl; depovince=JX; _r01_=1; JSESSIONID=abcTrUUNrwpusblQEmG7w; ick_login=1e7408cf-cc43-4d3f-89a1-c9e42225f36f; ick=56a20973-a92c-4c80-a5b7-7a314661aec8; t=5e76d39056956bafd2956acf04029aa44; societyguester=5e76d39056956bafd2956acf04029aa44; id=973035504; xnsid=1fcbe7d9; XNESSESSIONID=ac66307bb096; ver=7.0; loginfrom=null; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640374; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640381; wp_fold=0; jebecookies=76cf8b24-b558-403f-9aa3-071df0e45d9a|||||"
45     }
46     #2 
47     cookies_str = "anonymid=k3vin92j-utmczl; depovince=JX; _r01_=1; JSESSIONID=abcTrUUNrwpusblQEmG7w; ick_login=1e7408cf-cc43-4d3f-89a1-c9e42225f36f; ick=56a20973-a92c-4c80-a5b7-7a314661aec8; t=5e76d39056956bafd2956acf04029aa44; societyguester=5e76d39056956bafd2956acf04029aa44; id=973035504; xnsid=1fcbe7d9; XNESSESSIONID=ac66307bb096; ver=7.0; loginfrom=null; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640374; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640381; wp_fold=0; jebecookies=76cf8b24-b558-403f-9aa3-071df0e45d9a|||||"
48     cookies_dict = {}
49     for temp in cookies_str.split("; "):
50         cookies_dict[temp.split("=")[0]] = temp.split("=")[1]
51 
52     response =  requests.get(url,headers = headers,cookies = cookies_dict)
53     data_str = response.content.decode("utf8")
54 
55     #將data_str 寫到文件中
56     with open("test.html","w",encoding="utf8") as f:
57         f.write(data_str)
58 
59 
60 if __name__ == '__main__':
61     renren_login()
使用Cookie 登陸人人網
  1 import requests
  2 from bs4 import BeautifulSoup
  3 import random
  4 
  5 USER_AGENT = [
  6     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
  7     # IPhone
  8     "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
  9     # IPod
 10     "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 11     # IPAD
 12     "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
 13     "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 14     # Android
 15     "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 16     "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 17     # QQ瀏覽器 Android版本
 18     "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 19     # Android Opera Mobile
 20     "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
 21     # Android Pad Moto Xoom
 22     "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
 23     # BlackBerry
 24     "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
 25     # WebOS HP Touchpad
 26     "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
 27     # Nokia N97
 28     "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
 29     # Windows Phone Mango
 30     "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
 31     # UC瀏覽器
 32     "UCWEB7.0.2.37/28/999",
 33     "NOKIA5700/ UCWEB7.0.2.37/28/999",
 34     # UCOpenwave
 35     "Openwave/ UCWEB7.0.2.37/28/999",
 36     # UC Opera
 37     "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
 38 ]
 39 # 1,直接用cookies 登陸  get請求!
 40 '''
 41 def renren_login():
 42     url = "http://www.renren.com/973035534/profile"
 43     headers = {
 44         "User-Agent":random.choice(USER_AGENT),
 45         #1 "Cookie":"anonymid=k3vin92j-utmczl; depovince=JX; _r01_=1; JSESSIONID=abcTrUUNrwpusblQEmG7w; ick_login=1e7408cf-cc43-4d3f-89a1-c9e42225f36f; ick=56a20973-a92c-4c80-a5b7-7a314661aec8; t=5e76d39056956bafd2956acf04029aa44; societyguester=5e76d39056956bafd2956acf04029aa44; id=973035504; xnsid=1fcbe7d9; XNESSESSIONID=ac66307bb096; ver=7.0; loginfrom=null; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640374; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640381; wp_fold=0; jebecookies=76cf8b24-b558-403f-9aa3-071df0e45d9a|||||"
 46     }
 47     #2
 48     cookies_str = "anonymid=k3vin92j-utmczl; depovince=JX; _r01_=1; JSESSIONID=abcTrUUNrwpusblQEmG7w; ick_login=1e7408cf-cc43-4d3f-89a1-c9e42225f36f; ick=56a20973-a92c-4c80-a5b7-7a314661aec8; t=5e76d39056956bafd2956acf04029aa44; societyguester=5e76d39056956bafd2956acf04029aa44; id=973035504; xnsid=1fcbe7d9; XNESSESSIONID=ac66307bb096; ver=7.0; loginfrom=null; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640374; jebe_key=6a7ea1ce-01bb-4e22-8243-96d423bd8c44%7C112e5449a850bb630869ba6e835ed605%7C1575719639127%7C1%7C1575719640381; wp_fold=0; jebecookies=76cf8b24-b558-403f-9aa3-071df0e45d9a|||||"
 49     cookies_dict = {}
 50     for temp in cookies_str.split("; "):
 51         cookies_dict[temp.split("=")[0]] = temp.split("=")[1]
 52 
 53     response =  requests.get(url,headers = headers,cookies = cookies_dict)
 54     data_str = response.content.decode("utf8")
 55 
 56     #將data_str 寫到文件中
 57     with open("test.html","w",encoding="utf8") as f:
 58         f.write(data_str)
 59 '''
 60 
 61 # 2 先用post請求代碼登陸 post請求
 62 # 而後再用代碼獲取cookies 進行get請求
 63 '''
 64  1,登陸的網址
 65  2,登陸的參數
 66  form 標籤中 兩個屬性 action 登陸的網址, method 請求的方式  
 67            action:  http://www.renren.com/PLogin.do 
 68            method:  post 
 69  登陸的參數:
 70     email:
 71     password:
 72 
 73 '''
 74 
 75 
 76 def renren_login():
 77     login_url = "http://www.renren.com/PLogin.do "
 78     headers = {
 79         "User-Agent": random.choice(USER_AGENT),
 80     }
 81     login_data = {
 82         "email": "18337895201",
 83         "password": "123456"
 84     }
 85     # login_response = requests.post(login_url,data=login_data,headers = headers) #此時不用request.post() 了。
 86     session = requests.session()  #
 87     login_response = session.post(login_url, data=login_data, headers=headers)
 88 
 89 
 90     profile_url = "http://www.renren.com/973035504/profile"
 91     response = session.get(profile_url, headers=headers)  # cookies 會自動經過 session 傳遞的!
 92 
 93     data_str = response.content.decode("utf8")
 94 
 95     # 將data_str 寫到文件中
 96     with open("test03.html", "w", encoding="utf8") as f:
 97         f.write(data_str)
 98 
 99 
100 if __name__ == '__main__':
101     renren_login()
代碼登陸

 

 1 import requests
 2 from bs4 import BeautifulSoup
 3 import random
 4 
 5 USER_AGENT = [
 6     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
 7     # IPhone
 8     "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 9     # IPod
10     "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
11     # IPAD
12     "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
13     "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
14     # Android
15     "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
16     "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
17     # QQ瀏覽器 Android版本
18     "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
19     # Android Opera Mobile
20     "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
21     # Android Pad Moto Xoom
22     "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
23     # BlackBerry
24     "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
25     # WebOS HP Touchpad
26     "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
27     # Nokia N97
28     "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
29     # Windows Phone Mango
30     "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
31     # UC瀏覽器
32     "UCWEB7.0.2.37/28/999",
33     "NOKIA5700/ UCWEB7.0.2.37/28/999",
34     # UCOpenwave
35     "Openwave/ UCWEB7.0.2.37/28/999",
36     # UC Opera
37     "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
38 ]
39 
40 
41 
42 def renren_login():
43     login_url = "http://www.renren.com/PLogin.do"
44     headers = {
45         "User-Agent": random.choice(USER_AGENT),
46     }
47     login_data = {
48         "email": "17770832665",
49         "password": "123456"
50     }
51     # login_response = requests.post(login_url,data=login_data,headers = headers) #此時不用request.post() 了。
52     session = requests.session()  #
53     login_response = session.post(login_url, data=login_data, headers=headers)
54 
55     print(login_response.headers)
56 
57     profile_url = "http://www.renren.com/973036509/profile"
58     response = session.get(profile_url, headers=headers)  # cookies 會自動經過 session 傳遞的!
59     #
60     data_str = response.content.decode("utf8")
61 
62     # 將data_str 寫到文件中
63     with open("d:/test04.html", "w", encoding="utf8") as f:
64         f.write(data_str)
65 
66 
67 if __name__ == '__main__':
68     renren_login()
看這個!cookie 登陸
 1 import requests
 2 from bs4 import BeautifulSoup
 3 import random
 4 
 5 USER_AGENT = [
 6     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
 7     # IPhone
 8     "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
 9     # IPod
10     "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
11     # IPAD
12     "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
13     "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
14     # Android
15     "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
16     "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
17     # QQ瀏覽器 Android版本
18     "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
19     # Android Opera Mobile
20     "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
21     # Android Pad Moto Xoom
22     "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
23     # BlackBerry
24     "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
25     # WebOS HP Touchpad
26     "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
27     # Nokia N97
28     "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
29     # Windows Phone Mango
30     "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
31     # UC瀏覽器
32     "UCWEB7.0.2.37/28/999",
33     "NOKIA5700/ UCWEB7.0.2.37/28/999",
34     # UCOpenwave
35     "Openwave/ UCWEB7.0.2.37/28/999",
36     # UC Opera
37     "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"
38 ]
39 
40 def renren_login():
41     login_url = "http://www.renren.com/PLogin.do"
42     headers = {
43         "User-Agent": random.choice(USER_AGENT),
44     }
45     login_data = {
46         "email": "17770832661",
47         "password": "123456"
48     }
49     # login_response = requests.post(login_url,data=login_data,headers = headers) #此時不用request.post() 了。
50     session = requests.session()  #
51     login_response = session.post(login_url, data=login_data, headers=headers)
52 
53     # print(login_response.request.headers)
54     #下面三行代碼 能夠用於創建cookie池 。而後,就能夠不用這種代碼登陸的方式。能夠用前面的直接用cookie get登陸的方式!
55     cookie_list =[]
56     cookie_list.append( dict (login_response.request.headers)["Cookie"])
57     print(cookie_list)
58 
59     # profile_url = "http://www.renren.com/973036509/profile"
60     # response = session.get(profile_url, headers=headers)  # cookies 會自動經過 session 傳遞的!
61     # #
62     # data_str = response.content.decode("utf8")
63     #
64     # # 將data_str 寫到文件中
65     # with open("d:/test04.html", "w", encoding="utf8") as f:
66     #     f.write(data_str)
67 
68 
69 if __name__ == '__main__':
70     renren_login()
使用session 獲取cookie
相關文章
相關標籤/搜索