url = "http://zzk.cnblogs.com" urllib.urlopen(url)----->get方法 name = urllib.urlencode({"k":"b"}) urllib.urlopen(url+name)----->pst方法
開發者工具中form表單的method選項爲post,那麼必須使用post方法。php
import urllib import re reponse = urllib.urlopen("https://www.baidu.com") #打開指定的網頁,返回網頁全部信息 reponse_code = reponse.getcode() #獲取狀態碼 reponse_body = reponse.read() #獲取網頁內容 #直接保存網頁地址的內容到指定的文件 save = urllib.urlretrieve("https://www.baidu.com", filename="/home/guido/python/baidu.html") images = re.findall(r"src='(.*?\.jpg)'", reponse_body) #利用正則表達式匹配數據 urllib.urlretrieve(images[0], filename="/home/guido/python/baidu_images.html")
import urllib parament = urllib.urlencode({"t":"b", "w":"ios"}) url = ("http://zzk.cnblogs.com/s?"+parament) print(url) 執行結果: http://zzk.cnblogs.com/s?t=b&w=ios
import urllib2 url = "http://www.phpno.com"
#僞造瀏覽器請求頭 send_headers = { "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding":"gzip, deflate, sdch", "Accept-Language":"zh-CN,zh;q=0.8", "Cache-Control":"max-age=0", "Connection":"keep-alive", "Cookie":"ASPSESSIONIDCCTRDBQT=OJNFDDEANPLCEFLECFILODNN; Hm_lvt_39dcd5bd05965dcfa70b1d2457c6dcae=1484820976,1484821014,1484821053; Hm_lpvt_39dcd5bd05965dcfa70b1d2457c6dcae=1484821053", "Host":"www.nm3dp.com", "Referer":"https://www.baidu.com/link?url=Q_AEn1rb05AX6miw616Tx5bIWILq5K_FpUQl_eyJ7TS&wd=&eqid=cb712bbf00052caf00000003588091e9", "Upgrade-Insecure-Requests":"1", "User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36" } req = urllib2.Request(url, headers=send_headers) #合併瀏覽器向服務器發送的內容 r = urllib2.urlopen(req) print(r.read())
response = urllib.uelopen("http://www.3jy.com/")css
html = response.read()html
soup = Beautifulsoup(html)python
print(soup.prettify())ios
soup.title soup.head soup.b soup.a
soup.p.attrs
soup.p.stringweb
soup.select('title') 經過標籤名查找 soup.select('.sister') 經過類名查找 soup.select('#link1') 經過ID名查找 soup.select(p link1) 組合查找 soup.select('head>title') 直接子標籤查找 soup.select('a[class='sister']') 屬性查找 soup.p['class'] 獲取標籤內某個屬性的值(內容)
經過索引的方式把select的返回值列表,又轉換成能夠用select方法的對象,能夠進一步操做正則表達式
aa = soup.select('body') bb = aa[o] cc = bb.select('a[class='sister']')