1. 打開URL,讀取返回的數據html
#!/usr/bin/env python from urllib import request url = 'https://www.jd.com' req = request.urlopen(url) res = req.read() print(res.decode('utf-8'))
2. 對post數據進行編碼python
#!/usr/bin/env python from urllib import request from urllib import parse url = 'http://httpbin.org/post' payload = {'key1': 'value1', 'key2': 'value2'} newpayload = parse.urlencode(payload).encode('utf-8') #post的數據必須是bytes或者iterable of bytes,不能是str,所以須要進行encode()編碼 print(type(newpayload)) #encode就是把字符串轉換成字節 req = request.urlopen(url, data=newpayload) res = req.read() print(res.decode('utf-8')) #decode就是把字節轉換成字符串 ---------------------------------------------------------------------------------------> <class 'bytes'> { "args": {}, "data": "", "files": {}, "form": { "key1": "value1", "key2": "value2" }, "headers": { "Accept-Encoding": "identity", "Connection": "close", "Content-Length": "23", "Content-Type": "application/x-www-form-urlencoded", "Host": "httpbin.org", "User-Agent": "Python-urllib/3.6" }, "json": null, "origin": "183.48.35.148", "url": "http://httpbin.org/post" }
#!/usr/bin/env python from urllib import request url = 'https://www.qiushibaike.com/' ua = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"} req = request.Request(url, headers=ua) res = request.urlopen(req) print(res.read().decode('utf-8'))
4. 下載git
#!/usr/bin/env python from urllib import request url = 'https://www.baidu.com/img/bd_logo1.png' request.urlretrieve(url, filename='baidu.png')
import codecs from urllib import request url = 'https://www.baidu.com/img/bd_logo1.png' req = request.urlopen(url) res = req.read() with codecs.open('1.png', 'wb') as fd: fd.write(res)
5. 代理github
#!/usr/bin/env python from urllib import request url = 'http://2017.ip138.com/ic.asp' print(request.urlopen(url).read().decode('gb2312')) dic = {'http': '125.88.177.128:3128'} proxy = request.ProxyHandler(dic) //建立一個有代理功能的handler opener = request.build_opener(proxy) //用handler建立opener req = opener.open(url) //用opener訪問url res = req.read().decode('gb2312') print(res) -----------------------------------------------------------> <html> <head> <meta http-equiv="content-type" content="text/html; charset=gb2312"> <title> 您的IP地址 </title> </head> <body style="margin:0px"><center>您的IP是:[113.68.17.83] 來自:廣東省廣州市 電信</center></body></html> <html> <head> <meta http-equiv="content-type" content="text/html; charset=gb2312"> <title> 您的IP地址 </title> </head> <body style="margin:0px"><center>您的IP是:[119.129.229.185] 來自:廣東省廣州市 電信</center></body></html>
6. 構造能攜帶cookie的openerjson
#!/usr/bin/env python import http.cookiejar from urllib import request url = 'https://www.github.com' cookie = http.cookiejar.CookieJar() //建立cookie對象 print(cookie) handler = request.HTTPCookieProcessor(cookie) //建立能保存cookie的handler opener = request.build_opener(handler) //用handler建立opener req = opener.open(url) //用opener訪問url,訪問完cookie對象就有值了 print(cookie) ----------------------------------------------------------> <CookieJar[]> <CookieJar[<Cookie logged_in=no for .github.com/>, <Cookie _gh_sess=eyJzZXNzaW9uX2lkIjoiYzJkNzE0NTA1OWQ4ZDc5MDA1NjM4NWI1ZDIwYjkxNTgiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTUxNjU0MDEzNzE0MCwiX2NzcmZfdG9rZW4iOiJOdTVMYXZjT0xMNDRYS1JmOTh3MFZNQnI0c3ZPOTNCWkFWRUZnN21yUFZ3PSJ9--c47838621e21bba5d72c7050345b40a9c513335a for github.com/>]>
7. 保存cookie信息到文件中跨域
MozillaCookieJar() 這個類繼承了FileCookieJar() 這個類, FileCookieJar()的構造函數定義了一個filename參數,也就是存儲cookie的文件,而能獲取cookie 是由於FileCookieJar() 這個類繼承了CookieJar() 這個類cookie
#!/usr/bin/env python import http.cookiejar from urllib import request url = 'https://www.github.com' filename = 'cookie.txt' cookie = http.cookiejar.MozillaCookieJar(filename) //實例化的時候傳入filename參數,獲得cookie對象 handler = request.HTTPCookieProcessor(cookie) opener = request.build_opener(handler) res = opener.open(url) cookie.save()
8. 從文件中讀取cookie並訪問urlapp
#!/usr/bin/env python import http.cookiejar from urllib import request filename = 'cookie.txt' cookie = http.cookiejar.MozillaCookieJar(filename) cookie.load(filename) url = 'https://www.github.com' handler = request.HTTPCookieProcessor(cookie) opener = request.build_opener(handler) res = opener.open(url).read() print(res.decode('utf-8'))