python2.x urllib2和urllib的使用

時間 2019-11-08

標籤 python2.x python urllib2 urllib 使用欄目 Python 简体版

原文原文鏈接

1.最簡單用法html

　　urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,...)chrome

 1 import urllib2
 2 import urllib
 3 
 4 
 5 response = urllib2.urlopen("http://www.baidu.com")
 6 
 7 print 'getcode():',response.getcode()
 8 print 'geturl():',response.geturl()
 9 print 'url:',response.url
10 print 'headers:\n',response.headers
11 print 'msg:',response.msg
12 
13 #-------------------------------------out--------------------------------------
14 getcode(): 200
15 geturl(): http://www.baidu.com
16 url: http://www.baidu.com
17 headers:
18 Date: Thu, 29 Dec 2016 06:28:36 GMT
19 Content-Type: text/html; charset=utf-8
20 Transfer-Encoding: chunked
21 Connection: Close
22 Vary: Accept-Encoding
23 Set-Cookie: BAIDUID=9A1E663B4C3AB33D11266F0D865A1F59:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
24 Set-Cookie: BIDUPSID=9A1E663B4C3AB33D11266F0D865A1F59; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
25 Set-Cookie: PSTM=1482992916; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
26 Set-Cookie: BDSVRTM=0; path=/
27 Set-Cookie: BD_HOME=0; path=/
28 Set-Cookie: H_PS_PSSID=21858_1464_21112_17001_21553_20930; path=/; domain=.baidu.com
29 P3P: CP=" OTI DSP COR IVA OUR IND COM "
30 Cache-Control: private
31 Cxy_all: baidu+0ba0b09e0fa305471b5e3b42c352570f
32 Expires: Thu, 29 Dec 2016 06:27:54 GMT
33 X-Powered-By: HPHP
34 Server: BWS/1.1
35 X-UA-Compatible: IE=Edge,chrome=1
36 BDPAGETYPE: 1
37 BDQID: 0x889c1bcd00004be7
38 BDUSERID: 0
39 
40 msg: OK

View Code

獲取html內容json

1 print response.read()     #以str字符串形式返回整個頁面
2 print response.readline() #每執行一次返回一行
3 print response.readlines() #以列表形式返回

View Code

2. 構造Request 設置headerscookie

 1 def set_headers():
 2     #構造Request,設置headers
 3     #__init__(self, url, data=None, headers={},origin_req_host=None, unverifiable=False)
 4     import urllib2
 5     headers = {'User-Agent':'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
 6     request = urllib2.Request("http://localhost:5000/urllib2testget",headers=headers)
 7 
 8     response = urllib2.urlopen(request)
 9     print request.headers
10     #追加一個header
11     request.add_header("addheader","nice")
12     response = urllib2.urlopen(request)
13     print request.headers
14 
15 set_headers()
16 
17 #--------------------------------輸出:
18 
19 {'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
20 {"a": "1", "": "2"}
21 ------------------------------------------------
22 {'Addheader': 'nice', 'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
23 {"a": "1", "": "2"}

View Code

3.發送get請求,發送post請求dom

 1 def get_post():
 2     #get方式
 3     import urllib2
 4     import urllib
 5     headers = {'User-Agent':'liu bi'}
 6     values = {"username":"diaosir_get","password":"diao123_get"}
 7     data = urllib.urlencode(values)
 8     print '---------------------get:'
 9     url = "http://localhost:5000/urllib2testget"
10     get_url=url+"?"+data
11     request = urllib2.Request(get_url,headers=headers)
12     response = urllib2.urlopen(request)
13     print json.loads(response.read())
14     print '---------------------post:'
15     url = "http://localhost:5000/urllib2testpost"
16     request = urllib2.Request(url,data,headers=headers)
17     response = urllib2.urlopen(request)
18     print json.loads(response.read())
19 
20 get_post()
21 
22 #---------------------------------------------------------輸出:
23 ---------------------get:
24 {u'username': u'diaosir_get', u'password': u'diao123_get'}
25 ---------------------post:
26 {u'username': u'diaosir_get', u'password': u'diao123_get'}

post&get

4.代理模式設置socket

def set_proxies():
    #1.proxy_handler
    #2.建立operner
    #3.安裝opener[非必須]
    #4.拿operner去請求url
    enable_proxy = True
    proxy_handler = urllib2.ProxyHandler({"http":'http://120.24.73.165:3128'})
    null_proxy_handler = urllib2.ProxyHandler({})
    if enable_proxy:
        opener = urllib2.build_opener(proxy_handler)#掛載opener
    else:
        opener = urllib2.build_opener(null_proxy_handler)
    request = urllib2.Request('http://www.baidu.com')
    print '---------------------不使用代理'
    response = urllib2.urlopen(request)
    print response.getcode(),request.host
    print '---------------------使用代理'
    response = opener.open(request)
    print response.getcode(),request.host

#----------------------------------------------------------輸出
---------------------不使用代理
200 www.baidu.com
---------------------使用代理
200 120.24.73.165:3128

View Code

5.debug模式, 代碼中urllib2.build_opener中的httpsHandler須要去掉，ide

 1 def debug_set():
 2     #代理，調試
 3     import  urllib2,urllib
 4     proxy_handler = urllib2.ProxyHandler({"http":'http://192.168.1.108:89'})
 5 
 6     #debuglog的使用
 7     httpHandler = urllib2.HTTPHandler(debuglevel=1)
 8     opener = urllib2.build_opener(httpHandler, httpsHandler,)
 9     urllib2.install_opener(opener) 
10     request = urllib2.Request('http://127.0.0.1:5000/urllib2testget?a=2&b=3',headers={'User-Agent':'liubi00'})
11     response = opener.open(request)
12     print response.getcode(),response.read()
13 
14 
15 
16 
17 #-------------------------------------------輸出:
18 send: 'GET /urllib2testget?a=2&b=3 HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: 127.0.0.1:5000\r\nConnection: close\r\nUser-Agent: liubi00\r\n\r\n'
19 reply: 'HTTP/1.0 200 OK\r\n'
20 header: Content-Type: text/html; charset=utf-8
21 header: Content-Length: 20
22 header: Server: Werkzeug/0.11.11 Python/2.7.12
23 header: Date: Fri, 30 Dec 2016 15:12:40 GMT
24 200 {"a": "2", "b": "3"}

View Code

6.獲取cookie存到cookie.txtpost

import cookielib
import  urllib2

def get_cookie():
    filename = 'cookie.txt'
    #聲明一個MozillaCookieJar對象實例來保存cookie，以後寫入文件
    cookie = cookielib.MozillaCookieJar(filename)
    #利用urllib2庫的HTTPCookieProcessor對象來建立cookie處理器
    handler = urllib2.HTTPCookieProcessor(cookie)
    #經過handler來構建opener
    opener = urllib2.build_opener(handler,)
    request = urllib2.Request('http://www.baidu.com')
    request.add_header('User-Agent','fuckyou')
    response = opener.open(request)
    #保存cookie到文件
    cookie.save(ignore_discard=True, ignore_expires=True)
    print response.getcode()

get_cookie()

#----------------------------------------------輸出:
200

View Code

7.經過cookie請求，更多查看http://www.cnblogs.com/sysu-blackbear/p/3629770.htmlui

 1 import cookielib
 2 import urllib2
 3 def use_cookie():
 4     #cookie--從cookies.txt讀取cookies,攜帶cookies請求
 5     cookie_file = 'cookie.txt'
 6     #建立MozillaCookieJar實例對象
 7     cookie = cookielib.MozillaCookieJar(cookie_file)
 8     #從文件中讀取cookie內容到變量
 9     cookie.load( ignore_discard=True, ignore_expires=True)
10     #建立請求的request
11     req = urllib2.Request("http://www.baidu.com")
12     #利用urllib2的build_opener方法建立一個opener
13     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
14     response = opener.open(req)
15     print response.read()

View Code

8.異常處理url

 1 def deal_errors():
 2     #異常處理
 3     import urllib2
 4     #HTTPError
 5     req = urllib2.Request('http://blog.csdn.net/cqcre')
 6     try:
 7         urllib2.urlopen(req)
 8     except urllib2.HTTPError, e:
 9         print e.code
10         print e.reason
11 
12     #URLError
13     requset = urllib2.Request('http://www.xxxxx.com')
14     try:
15         urllib2.urlopen(requset)
16     except urllib2.URLError, e:
17         print e.reason
18 
19     #HTTPERROR&URLERROR
20     req = urllib2.Request('http://blog.csdn.net/cqcre')
21     try:
22         urllib2.urlopen(req)
23     except urllib2.URLError, e:
24         if hasattr(e,"code"):
25             print e.code
26         if hasattr(e,"reason"):
27             print e.reason
28     else:
29         print "OK"

View Code

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。