通用代碼框架:html
try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding=r.apparent_encoding return r.text except: return "產生異常"
爬取某網頁100次花費的時間app
import requests import time def getHTMLText(url): try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding=r.apparent_encoding return r.text except: return "產生異常" if __name__=='__main__': url='http://www.baidu.com' a=time.time() for i in range(100): getHTMLText(url) b=time.time() print('爬取100次須要花費的時間爲%d秒' %(b-a))
爬取京東商品頁面的爬取:框架
import requests url='https://item.jd.com/5369026.html' try: r=requests.get(url) r.raise_for_status() r.encoding=r.apparent_encoding print(r.text[:1000]) except: print('爬取失敗')
爬取有限制的網頁:url
import requests url = 'http://yzb.tju.edu.cn/xwzx/tkbs_xw/201609/t20160914_285521.htm' try: kv={'user-agent':'Mozilla/5.0'} r = requests.get(url,headers=kv) r.raise_for_status() r.encoding = r.apparent_encoding print(r.text[1000:2000]) except: print('爬取失敗')
百度關鍵詞搜索:spa
import requests keyword='Python' try: kv = {'wd':keyword} r = requests.get('http://www.baidu.com/s',params=kv) print(r.request.url) r.raise_for_status() print(len(r.text)) except: print('爬取失敗')
360關鍵詞搜索全代碼:code
import requests keyword='Python' try: kv={'q':keyword} r=requests.get('http://www.so.com/s',params=kv) print(r.request.url) r.raise_for_status() print(len(r.text)) except: print('爬取失敗')
圖片爬取:htm
import requests import os url='http://image.nationalgeographic.com.cn/2017/0905/20170905114825283.jpg' root='E://pics//' path=root+url.split('/')[-1] try: if not os.path.exists(root): os.mkdir(root) if not os.path.exists(path): r=requests.get(url) with open(path,'wb') as f: f.write(r.content) f.close() print('文件保存成功') else: print('文件已存在') except: print('爬取失敗')
ip地址查詢:blog
import requests url='http://m.ip138.com/ip.asp?ip=' try: r=requests.get(url+'202.204.80.112') r.raise_for_status() r.encoding=r.apparent_encoding print(r.text[-500:]) except: print('爬取失敗')