pip3 install requests
pip3 install beautifulsoup4
import requests from bs4 import BeautifulSoup import uuid reponse = requests.get(url="https://www.autohome.com.cn/news/") reponse.encoding = reponse.apparent_encoding #獲取文本原來編碼,使二者編碼一致才能正確顯示 soup = BeautifulSoup(reponse.text,'html.parser') #使用的是html解析,通常使用lxml解析更好 target = soup.find(id="auto-channel-lazyload-article") #find根據屬性去獲取對象,id,attr,tag...自定義屬性 li_list = target.find_all('li') #列表形式 for li in li_list: a_tag = li.find('a') if a_tag: href = a_tag.attrs.get("href") #屬性是字典形式,使用get獲取指定屬性 title = a_tag.find("h3").text #find獲取的是對象含有標籤,獲取text img_src = "http:"+a_tag.find("img").attrs.get('src') print(href) print(title) print(img_src) img_reponse = requests.get(url=img_src) file_name = str(uuid.uuid4())+'.jpg' #設置一個不重複的圖片名 with open(file_name,'wb') as fp: fp.write(img_reponse.content)
reponse = requests.get(url) #根據url獲取響應對象 reponse.apparent_encoding #獲取文本的原來編碼 reponse.encoding #對文本編碼進行設置 reponse.text #獲取文本內容,str類型 reponse.content #獲取數據,byte類型 reponse.status_code #獲取響應狀態碼
soup = BeautifulSoup('網頁代碼','html.parser') #獲取HTML對象 target = soup.find(id="auto-channel-lazyload-article") #根據自定義屬性獲取標籤對象,默認找到第一個 li_list = target.find_all('li') #根據標籤名,獲取全部的標籤對象,放入列表中 注意:是自定義標籤均可以查找 v1 = soup.find('div') v1 = soup.find(id='il') v1 = soup.find('div',id='i1') find_all同樣 對於獲取的標籤對象,咱們可使用 obj.text 獲取文本 obj.attrs 獲取屬性字典
def get(url, params=None, **kwargs): r"""Sends a GET request. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) return request('get', url, params=params, **kwargs) def options(url, **kwargs): r"""Sends an OPTIONS request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) return request('options', url, **kwargs) def head(url, **kwargs): r"""Sends a HEAD request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response """ kwargs.setdefault('allow_redirects', False) return request('head', url, **kwargs) def post(url, data=None, json=None, **kwargs): r"""Sends a POST request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response """ return request('post', url, data=data, json=json, **kwargs) def put(url, data=None, **kwargs): r"""Sends a PUT request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response """ return request('put', url, data=data, **kwargs) def patch(url, data=None, **kwargs): r"""Sends a PATCH request. :param url: URL for the new :class:`Request` object. :param data: (optional) Dictionary (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response """ return request('patch', url, data=data, **kwargs) def delete(url, **kwargs): r"""Sends a DELETE request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response <Response>` object :rtype: requests.Response """ return request('delete', url, **kwargs)
from . import sessions def request(method, url, **kwargs): """Constructs and sends a :class:`Request <Request>`. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary or list of tuples ``[(key, value)]`` (will be form-encoded), bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload. ``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')`` or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers to add for the file. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How many seconds to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Boolean. Enable/disable GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection. Defaults to ``True``. :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use. Defaults to ``True``. :param stream: (optional) if ``False``, the response content will be immediately downloaded. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. :return: :class:`Response <Response>` object :rtype: requests.Response Usage:: >>> import requests >>> req = requests.request('GET', 'http://httpbin.org/get') <Response [200]> """ # By using the 'with' statement we are sure the session is closed, thus we # avoid leaving sockets open which can trigger a ResourceWarning in some # cases, and look like a memory leak in others. with sessions.Session() as session: return session.request(method=method, url=url, **kwargs)
:param method: 提交方式get,post,put,patch,delete,options,head
:param url: 提交地址
:param params: 在URL中傳遞的參數 GET
request.request(method='GET',url='http://xxxx.com',params={'k1':'v1','k2':'v2'})
會自動裝換爲http://xxxx.com?k1=v1&k2=v2
:param data: 在請求體中傳遞的數據,字典,字節,文件對象 POST
request.request(method='GET',url='http://xxxx.com',data={'user':'aaaa','password':'bbb'})
雖然顯示爲字典形式,可是會在傳遞時也轉換爲data = "user=aaaa&password=bbbb"
:param json: 存放在Django中請求體中的body中--->request.body中
request.request(method='GET',url='http://xxxx.com',json={'user':'aaaa','password':'bbb'})
會將json數據裝換爲字符串形式 json="{'user':'aaaa','password':'bbb'}",存放在請求體的body中
和data相比:data中只能存放基礎類型,不能存放字典,列表等,二json只是將數據字符串化,因此能夠存放這些數據類型
:param headers: 請求頭
能夠用於防止別人使用腳本登陸網站,例如上面抽屜自動登陸就是根據請求頭中用戶代理,來過濾用戶。也可使用Referer看上一步網站位置,能夠防止盜鏈等
:param cookies: cookies,存放在請求頭中,傳遞時是放在headers中傳遞過去的
:param files: 用於post方式傳遞文件時使用。使用鍵值對形式
request.post(usl='xxx',files={
'f1':open('s1.py','rb'), #傳遞的name:文件對象/文件內容 'f1':'dawfwafawfawf'
'f2':('newf1name',open('s1.py','rb') #元組中第一個參數,是上傳到服務器中的文件名,可指定
})
:param auth: 權限驗證,通常用於在web前端對數據進行加密base64加密。,一些網站在登陸時,使用登陸框輸入用戶密碼後,在前端進行加密,而後將數據存放在請求頭中
ret = requests.get('https://api.github.com/user',
auth=HTTPBasicAuth('用戶名', '密碼')
)
:param timeout: 超時float或者元組 一個參數時爲float,表明等待服務器返回響應內容的時間,兩個參數時爲元組形式,第一個表明鏈接網站超時時間,第二個表明等待服務器響應的超時時間 ret = requests.get('http://google.com/', timeout=1)
ret = requests.get('http://google.com/', timeout=(5, 1))
:param allow_redirects: 容許重定向,類型爲布爾型,默認爲True,容許後,會去獲取重定向後的頁面數據進行返回
requests.get('http://127.0.0.1:8000/test/', allow_redirects=False)
:param proxies: 代理,例如電腦出口IP(公網IP,非局域網)限制,以實現同IP操做限制。咱們聯繫不一樣人員經過不一樣公網IP去操做,以實現解除操做限制,這些人員稱爲代理
技術上使用:代理服務器,咱們向代理服務器上發送數據,讓服務器替咱們去選用代理IP去向指定的網站發送請求
request.post(
url = "http://dig.chouti.com/log",
data = form_data,
proxys = {
'http':'http://代理服務器地址:端口',
'https':'http://代理服務器地址:端口',
}
)
:param stream: 流方式獲取文件數據,下載一點數據到內存,就保存到磁盤中,每下載一點就保存一點。防止由於內存不足文件過大而不能完成下載任務狀況
from contextlib import closing
with closing(requests.get('http://httpbin.org/get', stream=True)) as r:
# 在此處理響應。
for i in r.iter_content():
print(i)
:param cert: 帶HTTPS時,通道進行ssl加密,原來http是使用socket,數據未加密,不安全。如今的HTTPS是含有加密解密過程。須要證書存在
一種是:自定義證書,客戶端須要客戶本身去安裝證書
request.get(
url="https:...",
cert="xxx.pem", #每次訪問須要攜帶證書,格式是pem,('.crt','.key')<兩個文件都須要攜帶,一塊兒拼接加密>,兩種文件驗證方法
)
另外一種是:在系統中帶有的認證證書,須要去購買,廠家和系統聯合,系統內置,直接對網站解析驗證
:param verify: 布爾類型,當爲false時,忽略上面cert證書的存在,照樣能夠獲取結果,通常網站爲了用戶便利,是容許這種狀況
對於上面的自動登陸時,cookie和session等會話期間產生的數據須要咱們本身手動管理。而session方法,會將請求獲取的響應cookie和響應體等放入全局變量中,之後咱們訪問該網站時,會將這些數據自動攜帶一塊兒發生過去。html
注意:對於請求頭咱們本身仍是須要去配置的前端
import requests headers = {} headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36' headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' session = requests.session() i1 = session.get("https://dig.chouti.com/",headers=headers) i1.close() form_data = { 'phone':"xxxx", 'password':"xxxx", 'oneMonth':'' } i2 = session.post(url="https://dig.chouti.com/login",data=form_data,headers=headers) i3 = session.post("https://dig.chouti.com/link/vote?linksId=20324146",headers=headers) print(i3.text) {"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_52941024478","likedTime":"1529507291930000","lvCount":"7","nick":"山上有風景","uvCount":"3","voteTime":"小於1分鐘前"}}}
推文:django-request對象git
不管咱們發送什麼樣的格式,都會到request.body中,而request.post中可能沒有值
依據的是請求頭中的content-type來判斷類型
例如: Content-Type: text/html;charset:utf-8; 常見的媒體格式類型以下: text/html : HTML格式 text/plain :純文本格式 text/xml : XML格式 image/gif :gif圖片格式 image/jpeg :jpg圖片格式 image/png:png圖片格式 以application開頭的媒體格式類型: application/xhtml+xml :XHTML格式 application/xml : XML數據格式 application/atom+xml :Atom XML聚合格式 application/json : JSON數據格式 application/pdf :pdf格式 application/msword : Word文檔格式 application/octet-stream : 二進制流數據(如常見的文件下載) application/x-www-form-urlencoded : <form encType=」」>中默認的encType,form表單數據被編碼爲key/value格式發送到服務器(表單默認的提交數據的格式) 另一種常見的媒體格式是上傳文件之時使用的: multipart/form-data : 須要在表單中進行文件上傳時,就須要使用該格式 以上就是咱們在平常的開發中,常常會用到的若干content-type的內容格式。
例如:當我使用post傳遞數據,在服務端接收請求體,存放在request.body中,
而後到請求頭中查詢content-type:application/x-www-form-urlencoded
再將接收的請求體拷貝到request.post中存放
from bs4 import BeautifulSoup html = ''' <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <a href="/wwewe/fafwaw" class="btn btn2">666daw6fw</a> <div id="content" for='1'> <p>div>p <label>title</label> </p> </div> <hr/> <p id="bott">div,p</p> </body> </html> ''' soup = BeautifulSoup(html,features="lxml")
tag = soup.find("a") print(tag.name) #a tag = soup.find(id="content") print(tag.name) #div
tag = soup.find('a') print(tag.attrs) #{'href': '/wwewe/fafwaw', 'class': ['btn', 'btn2']} print(tag.attrs['href']) #/wwewe/fafwaw tag.attrs['id']="btn-primary" #添加 del tag.attrs['class'] #刪除 tag.attrs['href']="/change" #改 print(tag.attrs) #{'id': 'btn-primary', 'href': '/change'}
body = soup.find("body") print(body.children) #list_iterator object,只會獲取子標籤,對於內部子孫標籤是做爲字符串形式存在 from bs4.element import Tag for child in body.children: # print(type(child)) # <class 'bs4.element.NavigableString'>字符串類型,通常是換行符,空格等 # <class 'bs4.element.Tag'>子節點類型 if type(child) == Tag: print(child)
body = soup.find("body") for child in body.descendants: #會將內部子孫標籤提出來,再次進行一次操做 # print(type(child)) # <class 'bs4.element.NavigableString'>字符串類型,通常是換行符,空格等 # <class 'bs4.element.Tag'>子節點類型 if type(child) == Tag: print(child)
body = soup.find("body") body.clear() #清空子標籤,保留本身 print(soup) #body標籤存在,內部爲空
body = soup.find('body') body.decompose() #遞歸刪除,包含本身 print(soup) #body標籤不存在
body = soup.find('body') deltag = body.extract() #遞歸刪除,包含本標籤 print(soup) #無body標籤 print(deltag) #是全部咱們刪除的標籤
#用字符串形式輸出,也能夠直接輸出,內置__str__方法 body = soup.find('body') v = body.decode() #含有當前標籤 print(v) v = body.decode_contents() #不含當前標籤 print(v)
#轉換爲字節類型 body = soup.find('body') v = body.encode() #含有body print(v) v = body.encode_contents() #不含body print(v)
tag = soup.find(name="p") #默認是尋找全部子孫節點的數據,遞歸查找 print(tag) #找到子孫下的第一個 tag = soup.find(name='p',recursive=False) print(tag) #None 是由於,當前標籤是html標籤,而不是body tag = soup.find('body').find('p') print(tag) ##找到子孫下的第一個 tag = soup.find('body').find('p',recursive=False) print(tag) #<p>div,p</p> tag = soup.find('body').find('div',attrs={"id":"content","for":"1"},recursive=False) print(tag) #找到該標籤
tags = soup.find_all('p') print(tags) tags = soup.find_all('p',limit=1) #只去獲取一個,可是返回仍是列表 print(tags) tags = soup.find_all('p',attrs={'id':"bott"}) #按屬性查找 print(tags) tags = soup.find_all(name=['p','a']) #查找全部p,a標籤 print(tags) tags = soup.find("body").find_all(name=['p','a'],recursive=False) #查找全部p,a標籤,只找子標籤 print(tags) tags = soup.find("body").find_all(name=['p','a'],text="div,p") #查找全部文本時div,p的標籤 print(tags) 正則匹配: import re pat = re.compile("p") tags = soup.find_all(name=pat) print(tags) pat = re.compile("^lab") #查找全部以lab開頭的標籤 tags = soup.find_all(name=pat) print(tags) pat = re.compile(".*faf.*") tags = soup.find_all(attrs={"href":pat}) #或者直接href=pat print(tags) pat = re.compile("cont.*") tags = soup.find_all(id=pat) print(tags) 函數匹配: def func(tag): return tag.has_attr("class") and tag.has_attr("href") tags = soup.find_all(name=func) print(tags)
tag = soup.find('a') print(tag.get("href")) #獲取標籤屬性 print(tag.attrs.get("href")) #獲取標籤屬性 print(tag.has_attr("href"))
tag = soup.find(id='content') print(tag.get_text()) #獲取標籤的文本內容,會獲取全部的子孫標籤文本 tag = soup.find("label") print(tag.get_text()) #title print(tag.string) #title tag.string = "test" print(tag.get_text()) #test
body = soup.find("body") child_tag = body.find("div",recursive=False) if child_tag: print(body.index(child_tag)) #必須是其子標籤,不是子孫標籤
tag = soup.find('hr') print(tag.is_empty_element) #判斷是否是空標籤,閉合標籤
tag.next
tag.next_element
tag.next_elements #會包含有字符串文本類型 tag.next_sibling #只獲取標籤對象Tag
tag.next_siblings
tag.previous
tag.previous_element
tag.previous_elements
tag.previous_sibling
tag.previous_siblings
tag.parent
tag.parents
tag = soup.find(id="content") print(tag) print(tag.next) #下一個元素,這裏是換行符 print(tag.next_element) #下一個元素,這裏是換行符 print(tag.next_elements) #下面的全部子孫標籤,都會提出來進行一次迭代 for ele in tag.next_elements: print(ele) print(tag.next_sibling) #只去獲取子標籤 print(tag.next_siblings) #只含有子標籤 for ele in tag.next_siblings: print(ele)
tag.find_next(...)
tag.find_all_next(...)
tag.find_next_sibling(...)
tag.find_next_siblings(...)
tag.find_previous(...)
tag.find_all_previous(...)
tag.find_previous_sibling(...)
tag.find_previous_siblings(...)
tag.find_parent(...)
tag.find_parents(...)
tag = soup.find("label") # print(tag.parent) # for par in tag.parents: # print(par) print(tag.find_parent(id='content')) #根據條件去上面查找符合條件的一個標籤 print(tag.find_parents(id='content')) #根據條件去向上面查找全部符合條件的標籤,列表形式
soup.select("title") soup.select("p nth-of-type(3)") soup.select("body a") soup.select("html head title") tag = soup.select("span,a") soup.select("head > title") soup.select("p > a") soup.select("p > a:nth-of-type(2)") soup.select("p > #link1") soup.select("body > a") soup.select("#link1 ~ .sister") soup.select("#link1 + .sister") soup.select(".sister") soup.select("[class~=sister]") soup.select("#link1") soup.select("a#link2") soup.select('a[href]') soup.select('a[href="http://example.com/elsie"]') soup.select('a[href^="http://example.com/"]') soup.select('a[href$="tillie"]') soup.select('a[href*=".com/el"]') from bs4.element import Tag def default_candidate_generator(tag): for child in tag.descendants: if not isinstance(child, Tag): continue if not child.has_attr('href'): continue yield child tags = soup.find('body').select("a", _candidate_generator=default_candidate_generator) print(type(tags), tags) from bs4.element import Tag def default_candidate_generator(tag): for child in tag.descendants: if not isinstance(child, Tag): continue if not child.has_attr('href'): continue yield child tags = soup.find('body').select("a", _candidate_generator=default_candidate_generator, limit=1) print(type(tags), tags)
from bs4.element import Tag tag_obj = Tag(name='pre',attrs={"col":30}) tag_obj.string="這是一個新建標籤" print(tag_obj) #<pre col="30">這是一個新建標籤</pre>
soup = BeautifulSoup(html,features="lxml") from bs4.element import Tag tag_obj = Tag(name='pre',attrs={"col":30}) tag_obj.string="這是一個新建標籤" # print(tag_obj) #<pre col="30">這是一個新建標籤</pre> soup.find(id="content").append(tag_obj) #追加放在最後面 print(soup)
soup = BeautifulSoup(html,features="lxml") from bs4.element import Tag tag_obj = Tag(name='pre',attrs={"col":30}) tag_obj.string="這是一個新建標籤" # print(tag_obj) #<pre col="30">這是一個新建標籤</pre> soup.find(id="content").insert(0,tag_obj) #追加放在最前面 print(soup)
soup = BeautifulSoup(html,features="lxml") from bs4.element import Tag tag_obj = Tag(name='pre',attrs={"col":30}) tag_obj.string="這是一個新建標籤" # print(tag_obj) #<pre col="30">這是一個新建標籤</pre> soup.find(id="content").insert_before(tag_obj) #放在當前標籤前面 soup.find(id="content").insert_after(tag_obj) #放在當前標籤後面 print(soup)
soup = BeautifulSoup(html,features="lxml") from bs4.element import Tag tag_obj = Tag(name='pre',attrs={"col":30}) tag_obj.string="這是一個新建標籤" # print(tag_obj) #<pre col="30">這是一個新建標籤</pre> soup.find(id="content").replace_with(tag_obj) #原來div標籤被替換 print(soup)
def setup(self, parent=None, previous_element=None, next_element=None,
previous_sibling=None, next_sibling=None):
soup = BeautifulSoup(html,features="lxml") div = soup.find('div') a = soup.find('a') div.setup(next_sibling=a) print(soup) #沒有變化 print(div.next_sibling) #是咱們設置的那個標籤對象
soup = BeautifulSoup(html,features="lxml") from bs4.element import Tag tag_obj = Tag(name='pre',attrs={"col":30}) tag_obj.string="這是一個新建標籤" a = soup.find("a") a.wrap(tag_obj) #用新建標籤將當前a標籤包含起來 div = soup.find('div') tag_obj.wrap(div) #用本來存在的標籤包含如今的tag_obj,包含數放在最後面 print(soup)
div = soup.find('div') div.unwrap() print(soup)
import requests
from bs4 import BeautifulSoup html1 = requests.get(url="https://github.com/login") #先到登陸頁,獲取token,cookies html1.encoding = html1.apparent_encoding soup = BeautifulSoup(html1.text,features="html.parser") login_token_obj = soup.find(name='input', attrs={'name': 'authenticity_token'}) login_token = login_token_obj.get("value") #獲取到頁面的令牌 cookie_dict = html1.cookies.get_dict() html1.close()
#填寫form表單須要的數據 login_data = { 'login':"帳號", 'password':"密碼", 'authenticity_token':login_token, "utf8": "", "commit":"Sign in" }
session_reponse = requests.post("https://github.com/session",data=login_data,cookies=cookie_dict) #必須傳入cookies cookie_dict.update(session_reponse.cookies.get_dict()) #更新網站的cookies index_reponse = requests.get("https://github.com/settings/repositories",cookies=cookie_dict) #必須攜帶cookies soup2 = BeautifulSoup(index_reponse.text,features="html.parser") #解析下面的列表數據,獲取項目名和項目大小 item_list = soup2.find_all("div",{'class':'listgroup-item'}) for item in item_list: a_obj = item.find("a") s_obj = item.find('small') print(a_obj.text) print(s_obj.text)
推文:爲什麼大量網站不能抓取?爬蟲突破封禁的6種常見方法github
import requests
headers = {} #設置請求頭 headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36' headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' i1 = requests.get("https://dig.chouti.com/",headers=headers) i1_cookie = i1.cookies.get_dict() print(i1_cookie) i1.close() form_data = { 'phone':"xxxx", 'password':"xxxx", 'oneMonth':'' } headers['Accept'] = '*/*' i2 = requests.post(url="https://dig.chouti.com/login",headers=headers,data=form_data,cookies=i1_cookie) i2_cookie = i2.cookies.get_dict() i2_cookie.update(i1_cookie) i3 = requests.post("https://dig.chouti.com/link/vote?linksId=20306326",headers=headers,cookies=i2_cookie) print(i3.text)
{'JSESSIONID': 'aaaoJAuXMtUytb02Uw9pw', 'route': '0c5178ac241ad1c9437c2aafd89a0e50', 'gpsd': '91e20c26ddac51c60ce4ca8910fb5669'}
{"result":{"code":"9999", "message":"推薦成功", "data":{"jid":"cdu_52941024478","likedTime":"1529420936883000","lvCount":"23","nick":"山上有風景","uvCount":"2","voteTime":"小於1分鐘前"}}}