1. GET請求css
1 In [1]: import requests 2 3 In [2]: help(requests.get) //使用requests.get提交GET請求 4 Help on function get in module requests.api: 5 6 get(url, params=None, **kwargs) 7 Sends a GET request. 8 9 :param url: URL for the new :class:`Request` object. 10 :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. 11 :param \*\*kwargs: Optional arguments that ``request`` takes. 12 :return: :class:`Response <Response>` object 13 :rtype: requests.Response 14 15 16 In [3]: r = requests.get('https://github.com/timeline.json') 17 18 19 In [6]: dic = {'hostname':'n3', 'ip':''} 20 21 In [7]: r = requests.get('', params=dic) //GET請求攜帶參數,不須要進行編碼 22 23 In [10]: r.text 24 Out[10]: u'GET OK' 25 26 In [11]: r.status_code 27 Out[11]: 200 28 29 In [12]: r.url 30 Out[12]: u'' 31 32 33 In [11]: ua = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'} 34 35 In [12]: r = requests.get('https://www.qiushibaike.com/', headers=ua) //支持自定義User-Agent 36 37 In [13]: r.status_code 38 Out[13]: 200
2. POST請求html
1 In [1]: import requests 2 3 In [2]: import json 4 5 In [3]: url = 'http://httpbin.org/post' 6 7 In [4]: ua = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'} 8 9 In [5]: dic = {'key1': 'value1', 'key2': 'value2'} 10 11 In [6]: r = requests.post(url, data=dic) //POST請求攜帶參數,不須要編碼 12 13 In [7]: r.status_code 14 Out[7]: 200 15 16 In [8]: json.loads(r.text) 17 Out[8]: 18 {u'args': {}, 19 u'data': u'', 20 u'files': {}, 21 u'form': {u'key1': u'value1', u'key2': u'value2'}, 22 u'headers': {u'Accept': u'*/*', 23 u'Accept-Encoding': u'gzip, deflate', 24 u'Connection': u'close', 25 u'Content-Length': u'23', 26 u'Content-Type': u'application/x-www-form-urlencoded', 27 u'Host': u'httpbin.org', 28 u'User-Agent': u'python-requests/2.18.4'}, 29 u'json': None, 30 u'origin': u'', 31 u'url': u'http://httpbin.org/post'} 32 33 In [9]: r = requests.post(url, data=dic, headers=ua) //一樣支持自定義User-Agent 34 35 In [10]: r.status_code 36 Out[10]: 200
3. 響應內容python
方法時對響應進行解碼。Requests 首先在 HTTP 頭部檢測是否存在指定的編碼方式,若是不存在,則會使用
charade 來嘗試猜想編碼方式
1 In [14]: r = requests.get('https://github.com/timeline.json') 2 3 In [15]: r.encoding 4 Out[15]: 'utf-8' 5 6 In [16]: r.text 7 Out[16]: u'{"message":"Hello there, wayfaring stranger. If you\u2019re reading this then you probably didn\u2019t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.","documentation_url":"https://developer.github.com/v3/activity/events/#list-public-events"}'
3.2 二進制型響應內容(取圖片,文件)git
In [17]: r.content Out[17]: '{"message":"Hello there, wayfaring stranger. If you\xe2\x80\x99re reading this then you probably didn\xe2\x80\x99t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.","documentation_url":"https://developer.github.com/v3/activity/events/#list-public-events"}'
3.3 原始套接字響應內容github
1 In [21]: r = requests.get('https://github.com/timeline.json', stream=True) 2 3 In [22]: r.raw 4 Out[22]: <urllib3.response.HTTPResponse at 0x2d380d0> 5 6 In [23]: r.raw.read() 7 Out[23]: '{"message":"Hello there, wayfaring stranger. If you\xe2\x80\x99re reading this then you probably didn\xe2\x80\x99t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.","documentation_url":"https://developer.github.com/v3/activity/events/#list-public-events"}'
3.4 當Requests不能自動解碼時,自定義編碼來解碼web
1 [root@web spider]# cat demon1.py 2 #!/usr/bin/env python 3 4 import requests 5 6 url = 'https://www.baidu.com/' 7 r = requests.get(url, verify=False) 8 print(r.encoding) //查看返回的文本的編碼形式,在文本的頭部有聲明 9 print(r.text) 10 11 12 [root@web spider]# python demon1.py 13 ISO-8859-1 14 <!DOCTYPE html> 15 <!--STATUS OK--><html> <head><meta http-equiv=content-type content=text/html;charset=utf-8><meta http-equiv=X-UA-Compatible content=IE=Edge><meta content=always name=referrer><link rel=stylesheet type=text/css href=https://ss1.bdstatic.com/5eN1bjq8AAUYm2zgoY3K/r/www/cache/bdorz/baidu.min.css><title>ç¾åº¦ä¸ä¸ï¼ä½ å°±ç¥é</title> //亂碼
1 [root@web spider]# cat demon1.py 2 #!/usr/bin/env python 3 4 import requests 5 6 url = 'https://www.baidu.com/' 7 r = requests.get(url, verify=False) 8 r.encoding = 'utf-8' //當requests模塊不能正確對返回的內容解碼時,須要定義r.encoding的值來解碼,編碼形式與返回的內容聲明的編碼形式同樣 9 print(r.text) 10 11 12 [root@web spider]# python demon1.py 13 <!DOCTYPE html> 14 <!--STATUS OK--><html> <head><meta http-equiv=content-type content=text/html;charset=utf-8><meta http-equiv=X-UA-Compatible content=IE=Edge><meta content=always name=referrer><link rel=stylesheet type=text/css href=https://ss1.bdstatic.com/5eN1bjq8AAUYm2zgoY3K/r/www/cache/bdorz/baidu.min.css><title>百度一下,你就知道</title>
4. 響應狀態碼chrome
1 In [33]: r = requests.get('http://httpbin.org/get') 2 3 In [34]: r.status_code 4 Out[34]: 200 5 6 In [35]: r.status_code == requests.codes.ok //判斷狀態碼是否爲200 7 Out[35]: True 8 9 In [36]: 301 == requests.codes.ok 10 Out[36]: False 11 12 In [37]: 302 == requests.codes.ok 13 Out[37]: False
5. 響應頭json
1 In [48]: r = requests.get('https://www.github.com') 2 3 In [49]: r.headers 4 Out[49]: {'Status': '200 OK', 'Expect-CT': 'max-age=2592000, report-uri="https://api.github.com/_private/browser/errors"', 'X-Request-Id': 'fab96e29cdc0d636dd3ce5b51668717a', 'X-XSS-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'Content-Security-Policy': "default-src 'none'; base-uri 'self'; block-all-mixed-content; child-src render.githubusercontent.com; connect-src 'self' uploads.github.com status.github.com collector.githubapp.com api.github.com www.google-analytics.com github-cloud.s3.amazonaws.com github-production-repository-file-5c1aeb.s3.amazonaws.com github-production-upload-manifest-file-7fdce7.s3.amazonaws.com github-production-user-asset-6210df.s3.amazonaws.com wss://live.github.com; font-src assets-cdn.github.com; form-action 'self' github.com gist.github.com; frame-ancestors 'none'; img-src 'self' data: assets-cdn.github.com identicons.github.com collector.githubapp.com github-cloud.s3.amazonaws.com *.githubusercontent.com; media-src 'none'; script-src assets-cdn.github.com; style-src 'unsafe-inline' assets-cdn.github.com", 'X-Runtime-rack': '0.040115', 'Transfer-Encoding': 'chunked', 'Set-Cookie': 'logged_in=no; domain=.github.com; path=/; expires=Thu, 31 Dec 2037 04:34:58 -0000; secure; HttpOnly, _gh_sess=eyJzZXNzaW9uX2lkIjoiNzczMjViNjAxNzdkYmEyM2EwZjQ2NDkzMDBmZGM1ZmYiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTUxNDY5NDg5ODYyNywiX2NzcmZfdG9rZW4iOiJpZFVNTGlERHBDT0M2MHI4MXAyY2N4dmtYU21lTlFJUHVoSW9rTng4eC9nPSJ9--196e7163a50c84523f9bd24e5b1f411c39df0455; path=/; secure; HttpOnly', 'Strict-Transport-Security': 'max-age=31536000; includeSubdomains; preload', 'Vary': 'X-PJAX, Accept-Encoding', 'Server': 'GitHub.com', 'X-GitHub-Request-Id': '8E25:166FF:3551BE5:4F1D6DD:5A4868F2', 'X-Runtime': '0.032924', 'X-UA-Compatible': 'IE=Edge,chrome=1', 'Cache-Control': 'no-cache', 'Date': 'Sun, 31 Dec 2017 04:34:58 GMT', 'X-Frame-Options': 'deny', 'Content-Type': 'text/html; charset=utf-8', 'Public-Key-Pins': 'max-age=0; pin-sha256="WoiWRyIOVNa9ihaBciRSC7XHjliYS9VwUGOIud4PB18="; pin-sha256="RRM1dGqnDFsCJXBTHky16vi1obOlCgFFn/yOhI/y+ho="; pin-sha256="k2v657xBsOVe1PQRwOsHsw3bsGT2VzIqz5K+59sNQws="; pin-sha256="K87oWBWM9UZfyddvDfoxL+8lpNyoUB2ptGtn0fv6G2Q="; pin-sha256="IQBnNBEiFuhj+8x6X8XLgh01V9Ic5/V3IRQLNFFc7v4="; pin-sha256="iie1VXtL7HzAMF+/PVPR9xzT80kQxdZeJ+zduCB3uj0="; pin-sha256="LvRiGEjRqfzurezaWuj8Wie2gyHMrW5Q06LspMnox7A="; includeSubDomains', 'Content-Encoding': 'gzip'} 5 6 In [51]: r.headers['Set-Cookie'] 7 Out[51]: 'logged_in=no; domain=.github.com; path=/; expires=Thu, 31 Dec 2037 04:34:58 -0000; secure; HttpOnly, _gh_sess=eyJzZXNzaW9uX2lkIjoiNzczMjViNjAxNzdkYmEyM2EwZjQ2NDkzMDBmZGM1ZmYiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTUxNDY5NDg5ODYyNywiX2NzcmZfdG9rZW4iOiJpZFVNTGlERHBDT0M2MHI4MXAyY2N4dmtYU21lTlFJUHVoSW9rTng4eC9nPSJ9--196e7163a50c84523f9bd24e5b1f411c39df0455; path=/; secure; HttpOnly'
6. Cookieapi
1 [root@web spider]# cat cookie1.py 2 #!/usr/bin/env python 3 4 import requests 5 6 s = requests.session() 7 s.get('https://www.github.com') 8 print(s.cookies) 9 print('#'*30) 10 print(s.cookies.keys()) 11 print('#'*30) 12 print(s.cookies.values()) 13 print('#'*30) 14 for i in s.cookies: 15 print(i.name, i.value, i.domain, i.path, i.expires) 16 17 18 [root@web spider]# python cookie1.py 19 <<class 'requests.cookies.RequestsCookieJar'>[<Cookie logged_in=no for .github.com/>, <Cookie _gh_sess=eyJzZXNzaW9uX2lkIjoiMTQ5ZTFhODdkN2MzYzNjYzZmODQ2MDdjMThlMzUwMjgiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTUxNjUxMDAzNDk4NiwiX2NzcmZfdG9rZW4iOiJtVmhsSDFJaURxUlRNN3dXSGxCZG5CU3RaRW5YZ2JNbkFlenRzeXFxY3pzPSJ9--b739efdc41973b7a3a976d5a8a0d9b7691ca68ab for github.com/>]> 20 ############################## 21 ['logged_in', '_gh_sess'] 22 ############################## 23 ['no','eyJzZXNzaW9uX2lkIjoiMTQ5ZTFhODdkN2MzYzNjYzZmODQ2MDdjMThlMzUwMjgiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTUxNjUxMDAzNDk4NiwiX2NzcmZfdG9rZW4iOiJtVmhsSDFJaURxUlRNN3dXSGxCZG5CU3RaRW5YZ2JNbkFlenRzeXFxY3pzPSJ9--b739efdc41973b7a3a976d5a8a0d9b7691ca68ab'] 24 ############################## 25 ('logged_in', 'no', '.github.com', '/', 2147662035) 26 ('_gh_sess','eyJzZXNzaW9uX2lkIjoiMTQ5ZTFhODdkN2MzYzNjYzZmODQ2MDdjMThlMzUwMjgiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTUxNjUxMDAzNDk4NiwiX2NzcmZfdG9rZW4iOiJtVmhsSDFJaURxUlRNN3dXSGxCZG5CU3RaRW5YZ2JNbkFlenRzeXFxY3pzPSJ9--b739efdc41973b7a3a976d5a8a0d9b7691ca68ab', 'github.com', '/', None)
6.1 使用已知的cookie訪問網站瀏覽器
1 [root@web spider]# cat cookie2.py 2 #!/usr/bin/env python 3 4 import requests 5 6 url = 'http://httpbin.org/cookies' 7 r = requests.get(url, cookies={'key1': 'value1', 'key2': 'value2'}) //get()提供了一個專門接收cookie的參數,參數接收一個字典 8 print(r.text) 9 10 11 [root@web spider]# python cookie2.py 12 { 13 "cookies": { 14 "key1": "value1", 15 "key2": "value2" 16 } 17 }
7. 會話對象
會話對象讓你可以跨請求保持某些參數。它也會在同一個 Session 實例發出的全部請求之間保持 cookie
1 In [110]: s = requests.session() //建立一個會話對象 2 3 In [111]: help(s.get) //get和post方法用法同requests.get/requests.post 4 Help on method get in module requests.sessions: 5 6 get(self, url, **kwargs) method of requests.sessions.Session instance 7 Sends a GET request. Returns :class:`Response` object. 8 9 :param url: URL for the new :class:`Request` object. 10 :param \*\*kwargs: Optional arguments that ``request`` takes. 11 :rtype: requests.Response 12 13 14 In [112]: help(s.post) 15 Help on method post in module requests.sessions: 16 17 post(self, url, data=None, json=None, **kwargs) method of requests.sessions.Session instance 18 Sends a POST request. Returns :class:`Response` object. 19 20 :param url: URL for the new :class:`Request` object. 21 :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 22 :param json: (optional) json to send in the body of the :class:`Request`. 23 :param \*\*kwargs: Optional arguments that ``request`` takes. 24 :rtype: requests.Response 25 26 27 In [113]: r = s.get('') 28 29 In [114]: r.status_code 30 Out[114]: 200 31 32 In [115]: r.cookies 33 Out[115]: <<class 'requests.cookies.RequestsCookieJar'>[Cookie(version=0, name='csrftoken', value='nDCM6HJnfOI10QazYD78vdEO2Gt2r6NO', port=None, port_specified=False, domain='', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=1546137248, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False)]>
8. 使用代理
1 [root@web spider]# cat daili1.py 2 #!/usr/bin/env python 3 4 import requests 5 6 url = 'http://2017.ip138.com/ic.asp' 7 r = requests.get(url) 8 r.encoding = 'gb2312' 9 print(r.text) 10 11 proxies = { 12 # 'https': '', 13 'http': '' 14 } 15 r1 = requests.get(url, proxies=proxies) 16 r1.encoding = 'gb2312' 17 print(r1.text) 18 19 20 [root@web spider]# python daili1.py 21 <html> 22 <head> 23 <meta http-equiv="content-type" content="text/html; charset=gb2312"> 24 <title> 您的IP地址 </title> 25 </head> 26 <body style="margin:0px"><center>您的IP是:[] 來自:廣東省廣州市 電信</center></body></html> 27 <html> 28 <head> 29 <meta http-equiv="content-type" content="text/html; charset=gb2312"> 30 <title> 您的IP地址 </title> 31 </head> 32 <body style="margin:0px"><center>您的IP是:[] 來自:廣東省廣州市 電信</center></body></html>