1.學習pythonpython
urlopen(url, data, timeout)json
urllib2.Request()瀏覽器
urllib.urlencode()服務器
params = {} get : url + "?" + paramscookie
import urllib import urllib2 url = 'http://www.server.com/login' user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values = {'username' : 'cqc', 'password' : 'XXXX' } headers = { 'User-Agent' : user_agent } data = urllib.urlencode(values) request = urllib2.Request(url, data, headers) response = urllib2.urlopen(request) page = response.read()
對付」反盜鏈」的方式,對付防盜鏈,服務器會識別headers中的referer是否是它本身,若是不是,有的服務器不會響應,因此咱們還能夠在headers中加入refererapp
代理服務器設置學習
import urllib2 enable_proxy = True proxy_handler = urllib2.ProxyHandler({"http" : 'http://some-proxy.com:8080'}) null_proxy_handler = urllib2.ProxyHandler({}) if enable_proxy: opener = urllib2.build_opener(proxy_handler) else: opener = urllib2.build_opener(null_proxy_handler) urllib2.install_opener(opener)
import urllib2 httpHandler = urllib2.HTTPHandler(debuglevel=1) httpsHandler = urllib2.HTTPSHandler(debuglevel=1) opener = urllib2.build_opener(httpHandler, httpsHandler) urllib2.install_opener(opener) response = urllib2.urlopen('http://www.baidu.com')
import urllib2 import cookielib #聲明一個CookieJar對象實例來保存cookie cookie = cookielib.CookieJar() #利用urllib2庫的HTTPCookieProcessor對象來建立cookie處理器 handler=urllib2.HTTPCookieProcessor(cookie) #經過handler來構建opener opener = urllib2.build_opener(handler) #此處的open方法同urllib2的urlopen方法,也能夠傳入request response = opener.open('http://www.baidu.com') for item in cookie: print 'Name = '+item.name print 'Value = '+item.value
import cookielib import urllib2 #設置保存cookie的文件,同級目錄下的cookie.txt filename = 'cookie.txt' #聲明一個MozillaCookieJar對象實例來保存cookie,以後寫入文件 cookie = cookielib.MozillaCookieJar(filename) #利用urllib2庫的HTTPCookieProcessor對象來建立cookie處理器 handler = urllib2.HTTPCookieProcessor(cookie) #經過handler來構建opener opener = urllib2.build_opener(handler) #建立一個請求,原理同urllib2的urlopen response = opener.open("http://www.baidu.com") #保存cookie到文件 cookie.save(ignore_discard=True, ignore_expires=True)
import cookielib import urllib2 #建立MozillaCookieJar實例對象 cookie = cookielib.MozillaCookieJar() #從文件中讀取cookie內容到變量 cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True) #建立請求的request req = urllib2.Request("http://www.baidu.com") #利用urllib2的build_opener方法建立一個opener opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) response = opener.open(req) print response.read()
遍歷文檔樹ui
搜索文檔樹url