urllib2和urllib是python兩個內置的模塊,要實現HTTP功能,實現方式是以urllib2爲主,urllib爲輔html
import urllib2 response = urllib2.urlopen('http://www.cnblogs.com/guguobao') html = response.read() print html
#!coding:utf-8 import urllib2 #請求 request = urllib2.Request('http://www.cnblogs.com/guguobao') #響應 response = urllib2.urlopen(request) html = response.read() print html
#!coding:utf-8 import urllib import urllib2 url = 'http://www.cnblogs.com/login' postdata = {'username' : 'qiye', 'password' : 'qiye_pass'} #info 須要被編碼爲urllib2能理解的格式,這裏用到的是urllib data = urllib.urlencode(postdata) req = urllib2.Request(url, data) response = urllib2.urlopen(req) html = response.read()
把上面的列子添加User-Agent域和Referer域信息python
#coding:utf-8 #請求頭headers處理:設置一下請求頭中的User-Agent域和Referer域信息 import urllib import urllib2 url = 'http://www.xxxxxx.com/login' user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' referer='http://www.xxxxxx.com/' postdata = {'username' : 'qiye', 'password' : 'qiye_pass'} # 將user_agent,referer寫入頭信息 headers={'User-Agent':user_agent,'Referer':referer} data = urllib.urlencode(postdata) req = urllib2.Request(url, data,headers) response = urllib2.urlopen(req) html = response.read()
import urllib2,cookielib cookie = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) response = opener.open('http://www.zhihu.com') for item in cookie: print item.name+':'+item.name
import urllib2,cookielib opener = urllib2.build_opener() opener.addheaders.append(('Cookie','email='+'helloguguobao@gmail.com'))#Cookie和email替換什麼值均可以,但不能沒有 req = urllib2.Request('http://www.zhihu.com') response = opener.open(req) print response.headers retdata = response.read()
import urllib2 request=urllib2.Request('http://www.zhihu.com') response = urllib2.urlopen(request,timeout=2) html=response.read() print html
import urllib2 try: response = urllib2.urlopen('http://www.google.com') print response except urllib2.HTTPError as e: if hasattr(e, 'code'): print 'Error code:',e.code
import urllib2 response = urllib2.urlopen('http://www.zhihu.cn') isRedirected = response.geturl() == 'http://www.zhihu.cn'
import urllib2 class RedirectHandler(urllib2.HTTPRedirectHandler): def http_error_301(self, req, fp, code, msg, headers): pass def http_error_302(self, req, fp, code, msg, headers): result =urllib2.HTTPRedirectHandler.http_error_301(self,req,fp,code,msg,headers) result.status =code result.newurl = result.geturl() return result opener = urllib2.build_opener(RedirectHandler) opener.open('http://www.zhihu.cn')
import urllib2 proxy = urllib2.ProxyHandler({'http': '127.0.0.1:1080'})# 運行時須要把socketsocks關閉系統代理。並使用1080端口,或者直接退出socketsocks軟件 opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) response = urllib2.urlopen('http://www.zhihu.com/') print response.read()
這裏要注意一個細節,使用urllib2.install_opener()會設置urllib2的全局opener,以後,全部的HTTP訪問都會使用這個代理,這樣很方便,可是,想在程序中使用兩個不一樣的代理,就不能使用install_opener去更改全局的設置,而是直接調用urllib2.open()瀏覽器
import urllib2 proxy = urllib2.ProxyHandler({'http': '127.0.0.1:1080'}) opener = urllib2.build_opener(proxy,) response = opener.open("http://www.google.com/") print response.read()
運行時須要把socketsocks關閉系統代理。服務器