1.最簡單用法html
urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,...)chrome
1 import urllib2 2 import urllib 3 4 5 response = urllib2.urlopen("http://www.baidu.com") 6 7 print 'getcode():',response.getcode() 8 print 'geturl():',response.geturl() 9 print 'url:',response.url 10 print 'headers:\n',response.headers 11 print 'msg:',response.msg 12 13 #-------------------------------------out-------------------------------------- 14 getcode(): 200 15 geturl(): http://www.baidu.com 16 url: http://www.baidu.com 17 headers: 18 Date: Thu, 29 Dec 2016 06:28:36 GMT 19 Content-Type: text/html; charset=utf-8 20 Transfer-Encoding: chunked 21 Connection: Close 22 Vary: Accept-Encoding 23 Set-Cookie: BAIDUID=9A1E663B4C3AB33D11266F0D865A1F59:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com 24 Set-Cookie: BIDUPSID=9A1E663B4C3AB33D11266F0D865A1F59; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com 25 Set-Cookie: PSTM=1482992916; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com 26 Set-Cookie: BDSVRTM=0; path=/ 27 Set-Cookie: BD_HOME=0; path=/ 28 Set-Cookie: H_PS_PSSID=21858_1464_21112_17001_21553_20930; path=/; domain=.baidu.com 29 P3P: CP=" OTI DSP COR IVA OUR IND COM " 30 Cache-Control: private 31 Cxy_all: baidu+0ba0b09e0fa305471b5e3b42c352570f 32 Expires: Thu, 29 Dec 2016 06:27:54 GMT 33 X-Powered-By: HPHP 34 Server: BWS/1.1 35 X-UA-Compatible: IE=Edge,chrome=1 36 BDPAGETYPE: 1 37 BDQID: 0x889c1bcd00004be7 38 BDUSERID: 0 39 40 msg: OK
獲取html內容json
1 print response.read() #以str字符串形式返回整個頁面 2 print response.readline() #每執行一次返回一行 3 print response.readlines() #以列表形式返回
2. 構造Request 設置headerscookie
1 def set_headers(): 2 #構造Request,設置headers 3 #__init__(self, url, data=None, headers={},origin_req_host=None, unverifiable=False) 4 import urllib2 5 headers = {'User-Agent':'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'} 6 request = urllib2.Request("http://localhost:5000/urllib2testget",headers=headers) 7 8 response = urllib2.urlopen(request) 9 print request.headers 10 #追加一個header 11 request.add_header("addheader","nice") 12 response = urllib2.urlopen(request) 13 print request.headers 14 15 set_headers() 16 17 #--------------------------------輸出: 18 19 {'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'} 20 {"a": "1", "": "2"} 21 ------------------------------------------------ 22 {'Addheader': 'nice', 'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'} 23 {"a": "1", "": "2"}
3.發送get請求,發送post請求dom
1 def get_post(): 2 #get方式 3 import urllib2 4 import urllib 5 headers = {'User-Agent':'liu bi'} 6 values = {"username":"diaosir_get","password":"diao123_get"} 7 data = urllib.urlencode(values) 8 print '---------------------get:' 9 url = "http://localhost:5000/urllib2testget" 10 get_url=url+"?"+data 11 request = urllib2.Request(get_url,headers=headers) 12 response = urllib2.urlopen(request) 13 print json.loads(response.read()) 14 print '---------------------post:' 15 url = "http://localhost:5000/urllib2testpost" 16 request = urllib2.Request(url,data,headers=headers) 17 response = urllib2.urlopen(request) 18 print json.loads(response.read()) 19 20 get_post() 21 22 #---------------------------------------------------------輸出: 23 ---------------------get: 24 {u'username': u'diaosir_get', u'password': u'diao123_get'} 25 ---------------------post: 26 {u'username': u'diaosir_get', u'password': u'diao123_get'}
4.代理模式設置socket
def set_proxies(): #1.proxy_handler #2.建立operner #3.安裝opener[非必須] #4.拿operner去請求url enable_proxy = True proxy_handler = urllib2.ProxyHandler({"http":'http://120.24.73.165:3128'}) null_proxy_handler = urllib2.ProxyHandler({}) if enable_proxy: opener = urllib2.build_opener(proxy_handler)#掛載opener else: opener = urllib2.build_opener(null_proxy_handler) request = urllib2.Request('http://www.baidu.com') print '---------------------不使用代理' response = urllib2.urlopen(request) print response.getcode(),request.host print '---------------------使用代理' response = opener.open(request) print response.getcode(),request.host #----------------------------------------------------------輸出 ---------------------不使用代理 200 www.baidu.com ---------------------使用代理 200 120.24.73.165:3128
5.debug模式, 代碼中urllib2.build_opener中的httpsHandler須要去掉,ide
1 def debug_set(): 2 #代理,調試 3 import urllib2,urllib 4 proxy_handler = urllib2.ProxyHandler({"http":'http://192.168.1.108:89'}) 5 6 #debuglog的使用 7 httpHandler = urllib2.HTTPHandler(debuglevel=1) 8 opener = urllib2.build_opener(httpHandler, httpsHandler,) 9 urllib2.install_opener(opener) 10 request = urllib2.Request('http://127.0.0.1:5000/urllib2testget?a=2&b=3',headers={'User-Agent':'liubi00'}) 11 response = opener.open(request) 12 print response.getcode(),response.read() 13 14 15 16 17 #-------------------------------------------輸出: 18 send: 'GET /urllib2testget?a=2&b=3 HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: 127.0.0.1:5000\r\nConnection: close\r\nUser-Agent: liubi00\r\n\r\n' 19 reply: 'HTTP/1.0 200 OK\r\n' 20 header: Content-Type: text/html; charset=utf-8 21 header: Content-Length: 20 22 header: Server: Werkzeug/0.11.11 Python/2.7.12 23 header: Date: Fri, 30 Dec 2016 15:12:40 GMT 24 200 {"a": "2", "b": "3"}
6.獲取cookie存到cookie.txtpost
import cookielib import urllib2 def get_cookie(): filename = 'cookie.txt' #聲明一個MozillaCookieJar對象實例來保存cookie,以後寫入文件 cookie = cookielib.MozillaCookieJar(filename) #利用urllib2庫的HTTPCookieProcessor對象來建立cookie處理器 handler = urllib2.HTTPCookieProcessor(cookie) #經過handler來構建opener opener = urllib2.build_opener(handler,) request = urllib2.Request('http://www.baidu.com') request.add_header('User-Agent','fuckyou') response = opener.open(request) #保存cookie到文件 cookie.save(ignore_discard=True, ignore_expires=True) print response.getcode() get_cookie() #----------------------------------------------輸出: 200
7.經過cookie請求,更多查看http://www.cnblogs.com/sysu-blackbear/p/3629770.htmlui
1 import cookielib 2 import urllib2 3 def use_cookie(): 4 #cookie--從cookies.txt讀取cookies,攜帶cookies請求 5 cookie_file = 'cookie.txt' 6 #建立MozillaCookieJar實例對象 7 cookie = cookielib.MozillaCookieJar(cookie_file) 8 #從文件中讀取cookie內容到變量 9 cookie.load( ignore_discard=True, ignore_expires=True) 10 #建立請求的request 11 req = urllib2.Request("http://www.baidu.com") 12 #利用urllib2的build_opener方法建立一個opener 13 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) 14 response = opener.open(req) 15 print response.read()
8.異常處理url
1 def deal_errors(): 2 #異常處理 3 import urllib2 4 #HTTPError 5 req = urllib2.Request('http://blog.csdn.net/cqcre') 6 try: 7 urllib2.urlopen(req) 8 except urllib2.HTTPError, e: 9 print e.code 10 print e.reason 11 12 #URLError 13 requset = urllib2.Request('http://www.xxxxx.com') 14 try: 15 urllib2.urlopen(requset) 16 except urllib2.URLError, e: 17 print e.reason 18 19 #HTTPERROR&URLERROR 20 req = urllib2.Request('http://blog.csdn.net/cqcre') 21 try: 22 urllib2.urlopen(req) 23 except urllib2.URLError, e: 24 if hasattr(e,"code"): 25 print e.code 26 if hasattr(e,"reason"): 27 print e.reason 28 else: 29 print "OK"