選取一個商品頁面
html
import requests url = "https://item.jd.com/100011333796.html#crumb-wrap" try: #更改頭部信息,模擬瀏覽器訪問 kv = {'user-agent': 'Mozilla/5.0'} r = requests.get(url, headers = kv) r.raise_for_status() r.encoding = r.apparent_encoding print(r.text) except: print("爬取失敗")
import requests keyword = "Python" try: kv = {'wd': keyword} r = requests.get("http://www.baidu.com/s", params = kv) print(r.request.url) r.raise_for_status() print(len(r.text)) except: print("爬取失敗")
import requests keyword = "Python" try: kv = {'q': keyword} r = requests.get("http://www.so.com/s", params = kv) print(r.request.url) r.raise_for_status() print(len(r.text)) except: print("爬取失敗")
爬取csdn上的某張圖片python
import requests import os kv = {'user-agent' : 'Mozilla/5.0'} url = "https://imgconvert.csdnimg.cn/aHR0cHM6Ly9tbWJpei5xcGljLmNuL21tYml6X2pwZy8xaFJlSGFxYWZhZTN5bVlZanN2TWRmRnB4YUZiY3VNRFBOVXNPNzg0NWZST0V1cmVTNGdWWmliYXYyWTIzYlI1WXZmUWRJNjJVTWhLNWJyTWRpYXRDSnJnLzY0MA?x-oss-process=image/format,png" root = "E://python爬蟲學習//" path = root + '不想奮鬥.' + url.split(',')[-1] try: if not os.path.exists(root): os.mkdir(root) if not os.path.exists(path): r = requests.get(url, headers = kv) r.raise_for_status() r.encoding = r.apparent_encoding #print(r.status_code) with open(path, 'wb') as f: f.write(r.content) f.close() print("文件保存成功") else: print("文件已存在") except: print("爬取失敗")
手動查詢網址:https://www.ip138.com/
查詢中北大學網址web
import requests kv = {'user-agent' : 'Mozilla/5.0'} url1 = "https://www.ip138.com/iplookup.asp?ip=" url2 = "&action=2" try: r = requests.get(url1 + '202.207.177.39' + url2, headers = kv) r.raise_for_status() r.encoding = r.apparent_encoding print(r.text[1000:1800]) except: print("爬取失敗")