爬蟲隨筆一

一、requests模塊git

#HTTP請求類型
#get類型
r = requests.get('https://github.com/timeline.json')
#post類型
r = requests.post("http://m.ctrip.com/post")
#put類型
r = requests.put("http://m.ctrip.com/put")
#delete類型
r = requests.delete("http://m.ctrip.com/delete")
#head類型
r = requests.head("http://m.ctrip.com/head")
#options類型
r = requests.options("http://m.ctrip.com/get")

#獲取響應內容
print r.content #以字節的方式去顯示,中文顯示爲字符
print r.text #以文本的方式去顯示

#URL傳遞參數
payload = {'keyword': '日本', 'salecityid': '2'}
r = requests.get("http://m.ctrip.com/webapp/tourvisa/visa_list", params=payload) 
print r.url #示例爲http://m.ctrip.com/webapp/tourvisa/visa_list?salecityid=2&keyword=日本

#獲取/修改網頁編碼
r = requests.get('https://github.com/timeline.json')
print r.encoding
r.encoding = 'utf-8'

#json處理
r = requests.get('https://github.com/timeline.json')
print r.json() #須要先import json    

#定製請求頭
url = 'http://m.ctrip.com'
headers = {'User-Agent' : 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 4 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19'}
r = requests.post(url, headers=headers)
print r.request.headers

#複雜post請求
url = 'http://m.ctrip.com'
payload = {'some': 'data'}
r = requests.post(url, data=json.dumps(payload)) #若是傳遞的payload是string而不是dict,須要先調用dumps方法格式化一下

#post多部分編碼文件
url = 'http://m.ctrip.com'
files = {'file': open('report.xls', 'rb')}
r = requests.post(url, files=files)

#響應狀態碼
r = requests.get('http://m.ctrip.com')
print r.status_code
    
#響應頭
r = requests.get('http://m.ctrip.com')
print r.headers
print r.headers['Content-Type']
print r.headers.get('content-type') #訪問響應頭部份內容的兩種方式
    
#Cookies
url = 'http://example.com/some/cookie/setting/url'
r = requests.get(url)
r.cookies['example_cookie_name']    #讀取cookies
    
url = 'http://m.ctrip.com/cookies'
cookies = dict(cookies_are='working')
r = requests.get(url, cookies=cookies) #發送cookies

#設置超時時間
r = requests.get('http://m.ctrip.com', timeout=0.001)

#設置訪問代理
proxies = {
           "http": "http://10.10.10.10:8888",
           "https": "http://10.10.10.100:4444",
          }
r = requests.get('http://m.ctrip.com', proxies=proxies)
相關文章
相關標籤/搜索