什麼是requests模塊:html
requests模塊是python中原生的基於網絡請求的模塊,其主要做用是用來模擬瀏覽器發起請求。功能強大,用法簡潔高效。在爬蟲領域中佔據着半壁江山的地位。node
爲何要使用requests模塊python
安裝:web
requests模塊的使用流程ajax
import requests url = 'https://www.sogou.com/web' # 處理參數 wd = input("enter a word: ") param = { 'query':wd } # UA假裝 # User-Agent 請求頭信息。請求載體的身份標識 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36' } # 發起請求 response = requests.get(url,params=param,headers=headers) # 獲取響應數據 page_text = response.content # 持久化存儲 fileName = wd + ".html" with open(fileName, 'wb') as f: f.write(page_text) print(f"{wd}下載成功")
在爬取並存儲圖片時urllib模塊比較方便json
# 使用urllib模塊爬取圖片 from urllib import request url = "https://gss2.bdstatic.com/9fo3dSag_xI4khGkpoWK1HF6hhy/baike/w%3D268%3Bg%3D0/sign=081aba3563224f4a5799741531ccf76f/c83d70cf3bc79f3d423d2823b4a1cd11738b29c1.jpg" request.urlretrieve(url=url, filename='ycy.jpg')
import requests headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36' } url = 'https://fanyi.baidu.com/sug' # ajax請求 wd = input("enter a english word: ") # 參數的處理 data = { "kw": wd } # 發送post請求 response = requests.post(url=url,data=data,headers=headers) # 若是肯定返回的是json格式的數據,就能夠直接.json拿到json對象 json_data = response.json() print(json_data) print(type(response.text))
import requests headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36' } url = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword" wd = input('請輸入查詢地點:') data = { 'cname': '', 'pid': '', 'keyword': wd, 'pageIndex': '1', 'pageSize': '100', } json_data = requests.post(url=url, data=data, headers=headers).json() print(json_data)
需求分析: 指定頁面的公司,該公司的詳情頁數據瀏覽器
# 域名:http://125.35.6.84:81/xk/ import requests headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.12 Safari/537.36' } # 首頁url url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList' id_list = [] start_page= int(input('起始頁:')) end_page= int(input('結束頁:')) for i in range(start_page,end_page+1): data = { 'on': 'true', 'page': str(i), 'pageSize': '15', 'productName':'' , 'conditionType': '1', 'applyname': '', 'applysn': '', } json_data = requests.post(url=url,data=data,headers=headers).json() # print(json_data) for item in json_data['list']: id_list.append(item["ID"]) # 詳情頁url url2 = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsById' for id_item in id_list: data_id = { 'id': id_item } json_data2 = requests.post(url=url2,data=data_id,headers=headers).json() print(json_data2)