requests是基於Python開發的HTTP庫,使用Requests能夠垂手可得的完成瀏覽器可有的任何操做。git
request.get()
request.post()
request.put()
github
requests.request(method, url, **kwargs)
url = ‘’
,params = {'k1':'v1','k2','v2'}
, # get方法僅限的參數傳遞方式cookies = {}
,headers = {}
, # 請求頭data = {}
, # post等請求參數傳遞json = {}
, # json數據參數requests.get( url='xxx', params={'k1':'v1','nid':888}, cookies={}, headers={}, ) # http://www.baidu.com?k1=v2&nid=888
requests.post( url='xxx', params={'k1':'v1','nid':888}, cookies={}, # data headers={'content-type': 'application/x-www-form-urlencoded'}, data={}, # json # headers={'content-type': 'application/json'}, # json={} )
auth身份驗證json
def param_auth(): from requests.auth import HTTPBasicAuth, HTTPDigestAuth ret = requests.get('https://api.github.com/user', auth=HTTPBasicAuth('wupeiqi', 'sdfasdfasdf')) print(ret.text) # ret = requests.get('http://192.168.1.1', # auth=HTTPBasicAuth('admin', 'admin')) # ret.encoding = 'gbk' # print(ret.text) # ret = requests.get('http://httpbin.org/digest-auth/auth/user/pass', auth=HTTPDigestAuth('user', 'pass')) # print(ret)
allow_redirects 重定向(控制是否url跳轉)api
def param_allow_redirects(): ret = requests.get('http://127.0.0.1:8000/test/', allow_redirects=False) print(ret.text)
stream 流 (true相應內容按流式下載)瀏覽器
def param_stream(): ret = requests.get('http://127.0.0.1:8000/test/', stream=True) print(ret.content) ret.close() # from contextlib import closing # with closing(requests.get('http://httpbin.org/get', stream=True)) as r: # # 在此處理響應。 # for i in r.iter_content(): # print(i)
cert 是否攜帶證書(證書名)cookie
requests.get('http://httpbin.org/get',cert="xxxx.pem")
此處的session並不是以前所學session,
此處的session,是個容器,攜帶全部請求頭、體等等,
因此,咱們每次requests請求時,都須要cookies等手動添加請求中,
利用session能夠自動攜帶cookies、session等信息發送請求session
session = requests.Session() session.post(url,data={}) # 省略cookies=cookie # response_ = requests.post(url,data={},cookies=cookie)
BeautifulSoup是一個模塊,該模塊用於接收一個HTML或XML字符串,而後將其進行格式化,以後遍能夠使用他提供的方法進行快速查找指定元素,從而使得在HTML或XML中查找指定元素變得簡單。app
from bs4.Element import Tagpost
1,name 標籤名url
# tag = soup.find('a') # name = tag.name # 獲取 # print(name) # tag.name = 'span' # 設置 # print(soup)
2,attr 屬性標籤
# tag = soup.find('a') # attrs = tag.attrs # 獲取 # print(attrs) # tag.attrs = {'ik':123} # 設置 # tag.attrs['id'] = 'iiiii' # 設置 # print(soup)
3,children,全部子標籤
# body = soup.find('body') # v = body.children
4, children,全部後代標籤
# body = soup.find('body') # v = body.descendants
5, clear,將標籤的全部子標籤所有清空(保留標籤名)
# tag = soup.find('body') # tag.clear() # print(soup)
10, find,獲取匹配的第一個標籤
# tag = soup.find('a') # print(tag) # tag = soup.find(name='a', attrs={'class': 'sister'}, recursive=True, text='Lacie') # tag = soup.find(name='a', class_='sister', recursive=True, text='Lacie') # print(tag)
11, find_all,獲取匹配的全部標籤
# tags = soup.find_all('a') # print(tags) # tags = soup.find_all('a',limit=1) # print(tags) # tags = soup.find_all(name='a', attrs={'class': 'sister'}, recursive=True, text='Lacie') # # tags = soup.find(name='a', class_='sister', recursive=True, text='Lacie') # print(tags) # ####### 列表 ####### # v = soup.find_all(name=['a','div']) # print(v) # v = soup.find_all(class_=['sister0', 'sister']) # print(v) # v = soup.find_all(text=['Tillie']) # print(v, type(v[0])) # v = soup.find_all(id=['link1','link2']) # print(v) # v = soup.find_all(href=['link1','link2']) # print(v) # ####### 正則 ####### import re # rep = re.compile('p') # rep = re.compile('^p') # v = soup.find_all(name=rep) # print(v) # rep = re.compile('sister.*') # v = soup.find_all(class_=rep) # print(v) # rep = re.compile('http://www.oldboy.com/static/.*') # v = soup.find_all(href=rep) # print(v) # ####### 方法篩選 ####### # def func(tag): # return tag.has_attr('class') and tag.has_attr('id') # v = soup.find_all(name=func) # print(v) # ## get,獲取標籤屬性 # tag = soup.find('a') # v = tag.get('id') # print(v)
12, has_attr,檢查標籤是否具備該屬性
# tag = soup.find('a') # v = tag.has_attr('id') # print(v)
13, get_text,獲取標籤內部文本內容
# tag = soup.find('a') # v = tag.get_text('id') # print(v)
16, 當前的關聯標籤
# soup.next # soup.next_element # soup.next_elements # soup.next_sibling # soup.next_siblings # # tag.previous # tag.previous_element # tag.previous_elements # tag.previous_sibling # tag.previous_siblings # # tag.parent # tag.parents
17, 查找某標籤的關聯標籤
# tag.find_next(...) # tag.find_all_next(...) # tag.find_next_sibling(...) # tag.find_next_siblings(...) # tag.find_previous(...) # tag.find_all_previous(...) # tag.find_previous_sibling(...) # tag.find_previous_siblings(...) # tag.find_parent(...) # tag.find_parents(...) # 參數同find_all
20, append在當前標籤內部追加一個標籤
# tag = soup.find('body') # tag.append(soup.find('a')) # print(soup) # # from bs4.element import Tag # obj = Tag(name='i',attrs={'id': 'it'}) # obj.string = '我是一個新來的' # tag = soup.find('body') # tag.append(obj) # print(soup)
21, insert在當前標籤內部指定位置插入一個標籤
# from bs4.element import Tag # obj = Tag(name='i', attrs={'id': 'it'}) # obj.string = '我是一個新來的' # tag = soup.find('body') # tag.insert(2, obj) # print(soup)
22, insert_after,insert_before 在當前標籤後面或前面插入
# from bs4.element import Tag # obj = Tag(name='i', attrs={'id': 'it'}) # obj.string = '我是一個新來的' # tag = soup.find('body') # # tag.insert_before(obj) # tag.insert_after(obj) # print(soup)
23, replace_with 在當前標籤替換爲指定標籤
# from bs4.element import Tag # obj = Tag(name='i', attrs={'id': 'it'}) # obj.string = '我是一個新來的' # tag = soup.find('div') # tag.replace_with(obj) # print(soup)
24, 建立標籤之間的關係
# tag = soup.find('div') # a = soup.find('a') # tag.setup(previous_sibling=a) # print(tag.previous_sibling)