import requests import os #指定搜索關鍵字 word = input('enter a word you want to search:') #自定義請求頭信息 headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', } #指定url url = 'https://www.sogou.com/web' #封裝get請求參數 prams = { 'query':word, 'ie':'utf-8' } #發起請求 response = requests.get(url=url,params=param) #獲取響應數據 page_text = response.text with open('./sougou.html','w',encoding='utf-8') as fp: fp.write(page_text)
import requests import os url = 'https://accounts.douban.com/login' #封裝請求參數 data = { "source": "movie", "redir": "https://movie.douban.com/", "form_email": "15027900535", "form_password": "bobo@15027900535", "login": "登陸", } #自定義請求頭信息 headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', } response = requests.post(url=url,data=data) page_text = response.text with open('./douban111.html','w',encoding='utf-8') as fp: fp.write(page_text)
#!/usr/bin/env python # -*- coding:utf-8 -*- import requests import urllib.request if __name__ == "__main__": #指定ajax-get請求的url(經過抓包進行獲取) url = 'https://movie.douban.com/j/chart/top_list?' #定製請求頭信息,相關的頭信息必須封裝在字典結構中 headers = { #定製請求頭中的User-Agent參數,固然也能夠定製請求頭中其餘的參數 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', } #定製get請求攜帶的參數(從抓包工具中獲取) param = { 'type':'5', 'interval_id':'100:90', 'action':'', 'start':'0', 'limit':'20' } #發起get請求,獲取響應對象 response = requests.get(url=url,headers=headers,params=param) #獲取響應內容:響應內容爲json串 print(response.text)
#!/usr/bin/env python # -*- coding:utf-8 -*- import requests import urllib.request if __name__ == "__main__": #指定ajax-post請求的url(經過抓包進行獲取) url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword' #定製請求頭信息,相關的頭信息必須封裝在字典結構中 headers = { #定製請求頭中的User-Agent參數,固然也能夠定製請求頭中其餘的參數 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', } #定製post請求攜帶的參數(從抓包工具中獲取) data = { 'cname':'', 'pid':'', 'keyword':'北京', 'pageIndex': '1', 'pageSize': '10' } #發起post請求,獲取響應對象 response = requests.get(url=url,headers=headers,data=data) #獲取響應內容:響應內容爲json串 print(response.text)
import requests import os #指定搜索關鍵字 word = input('enter a word you want to search:') #指定起始頁碼 start_page = int(input('enter start page num:')) end_page = int(input('enter end page num:')) #自定義請求頭信息 headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', } #指定url url = 'https://zhihu.sogou.com/zhihu' #建立文件夾 if not os.path.exists('./sougou'): os.mkdir('./sougou') for page in range(start_page,end_page+1): #封裝get請求參數 params = { 'query':word, 'ie':'utf-8', 'page':str(page) } #發起post請求,獲取響應對象 response = requests.get(url=url,params=params) #獲取頁面數據 page_text = response.text fileName = word+'_'+str(page)+'.html' filePath = './sougou/'+fileName with open(filePath,'w',encoding='utf-8') as fp: fp.write(page_text) print('爬取'+str(page)+'頁結束')