Requests庫入門實例

爬蟲入門5個實例

實例1:京東商品頁面的爬取

import requests

def getHTMLText(url):
    try:
        r = requests.get(url,timeout = 30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "Something Wrong!!!"

url = "https://item.jd.com/27528447148.html"
print(getHTMLText(url)[:1000])

實例2:亞馬遜商品頁面的爬取

import requests
kv = {'user-agent':'Mozilla/5.0'} 
def getHTMLText(url):
    try:
        r = requests.get(url,timeout = 30,headers = kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "Something Wrong!!!"

url = "https://www.amazon.cn/dp/B0083DP0CY/ref=cngwdyfloorv2_recs_0/460-1382173-5298568?pf_rd_m=A1AJ19PSB66TGU&pf_rd_s=desktop-2&pf_rd_r=07R056YCCZREBTBFN41G&pf_rd_r=07R056YCCZREBTBFN41G&pf_rd_t=36701&pf_rd_p=d2aa3428-dc2b-4cfe-bca6-5e3a33f2342e&pf_rd_p=d2aa3428-dc2b-4cfe-bca6-5e3a33f2342e&pf_rd_i=desktop"
print(getHTMLText(url)[1000:2000])

實例3:百度搜索關鍵字提交

import requests
keyword = "Python"
kv = {'wd':keyword}
def getHTMLText(url):
    try:
        r = requests.get(url,timeout = 30,params = kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "Something Wrong!!!"

url = "http://www.baidu.com/s"
print(getHTMLText(url)[1000:2000])

實例4:網絡圖片的爬取和存儲

import requests
import os

url = "https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1546773451861&di=7cd06f87b97e5a337e6e60a2986098dd&imgtype=jpg&src=http%3A%2F%2Fimg0.imgtn.bdimg.com%2Fit%2Fu%3D78010880%2C3063559069%26fm%3D214%26gp%3D0.jpg"
root = "D://pics//"
path = root + url.split('%')[-1]

try:
    if not os.path.exists(root):
        os.mkdir(root)
    if not os.path.exists(path):
        r = requests.get(url)
        with open(path,'wb') as f:
            f.write(r.content)
            f.close()
            print("Saved!")
    else:
        print("Already Exists")
except:
    print("Something Wrong!!!")

實例5:IP地址歸屬地的自動查詢

import requests

def getHTMLText(url):
    try:
        r = requests.get(url,timeout = 30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "Something Wrong!!!"

url = "http://m.ip138.com/ip.asp?ip="
ip = "202.204.80.112"
urls = url + ip
print(getHTMLText(urls)[-500:])
相關文章
相關標籤/搜索