Python內置的Http請求庫:
* urllib.request 請求模塊
* urllib.error 異常處理模塊
* urllib.parse url解析模塊
* urllib.robotparser robots.txt解析模塊html
Python2python
import urllib2 response = urllib2.urlopen('http://www.baidu.com')
Python3nginx
import urllib.request response = urllib.request.urlopen('http://www.baidu.com')
import urllib.request response = urllib.request.urlopen('http://www.baidu.com') print(response.read().decode('utf-8'))
import urllib.parse import urllib.request data = bytes(urllib.parse.urlencode({'word':'hello'}),encoding='utf-8') response = urllib.request.urlopen('http://httpbin.org/post',data=data)
import urllib.request response = urllib.request.urlopen('http://httpbin.org/get',timeout=1) print(response.read())
import socket import urllib.request import urllib.error try: response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.1) except urllib.error.URLError as e: if isinstance(e.reason,socket.timeout): print('TIME OUT')
import urllib.request response = urllib.request.urlopen('https://www.python.org') print(type(response))
<class 'http.client.HTTPResponse'>json
import urllib.request response = urllib.request. response = urllib.request.urlopen('https://www.python.org') response = urllib.request.urlopen('https://www.python.org') print(response.status) print(response.getheaders()) print(response.getheader('Server'))
200
[('Server', 'nginx'), ('Content-Type', 'text/html; charset=utf-8'), ('X-Frame-Options', 'SAMEORIGIN'), ('x-xss-protection', '1; mode=block'), ('X-Clacks-Overhead', 'GNU Terry Pratchett'), ('Via', '1.1 varnish'), ('Content-Length', '50069'), ('Accept-Ranges', 'bytes'), ('Date', 'Mon, 26 Nov 2018 10:16:51 GMT'), ('Via', '1.1 varnish'), ('Age', '1872'), ('Connection', 'close'), ('X-Served-By', 'cache-iad2144-IAD, cache-tyo19943-TYO'), ('X-Cache', 'HIT, HIT'), ('X-Cache-Hits', '2, 4331'), ('X-Timer', 'S1543227412.955266,VS0,VE0'), ('Vary', 'Cookie'),
('Strict-Transport-Security', 'max-age=63072000; includeSubDomains')]
nginxcookie
from urllib import request,parse url = 'http://httpbin.org/post' headers = { 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'Host':'httpbin.org' } dict = { 'name':'Germey' } data = bytes(parse.urlencode(dict),encoding='utf-8') req = request.Request(url=url,data=data,headers=headers,method='POST') response = request.urlopen(req) print(response.read().decode('utf-8'))
{
"args": {},
"data": "",
"files": {},
"form": {
"name": "Germey"
},
"headers": {
"Accept-Encoding": "identity",
"Connection": "close",
"Content-Length": "11",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
},
"json": null,
"origin": "58.34.235.37",
"url": "http://httpbin.org/post"
}app
from urllib import request,parse url = 'http://httpbin.org/post' dict = {'name':'Germey'} data = bytes(parse.urlencode(dict),encoding='utf8') req = request.Request(url=url,data=data,method='POST') req.add_header('User-Agent','Mozilla/4.0 (compatible;MSIE 5.5;Windows NT)') response = request.urlopen(req) print(response.read().decode('utf-8'))
{
"args": {},
"data": "",
"files": {},
"form": {
"name": "Germey"
},
"headers": {
"Accept-Encoding": "identity",
"Connection": "close",
"Content-Length": "11",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "Mozilla/4.0 (compatible;MSIE 5.5;Windows NT)"
},
"json": null,
"origin": "58.34.235.37",
"url": "http://httpbin.org/post"
}dom
import urllib.request proxy_handler = urllib.request.ProxyHandler({ 'http':'http://127.0.0.1:9319', 'https':'https://127.0.0.1:9319' }) opener = urllib.request.build_opener(proxy_handler) response = opener.open('http://www.baidu.com') print(response.read())
import http.cookiejar,urllib.request cookie = http.cookiejar.CookieJar() handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) response = opener.open('http://www.baidu.com') for item in cookie: print(item.name+'='+item.value)
BAIDUID=DF51E1D71641196283719D090EEA14DA:FG=1
BIDUPSID=DF51E1D71641196283719D090EEA14DA
H_PS_PSSID=1433_21086_27508
PSTM=1543232201
delPer=0
BDSVRTM=0
BD_HOME=0curl
import http.cookiejar,urllib.request filename = 'cookie.txt' cookie = http.cookiejar.MozillaCookieJar(filename) handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) response = opener.open('http://www.baidu.com') cookie.save(ignore_discard=True,ignore_expires=True)
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.socket.baidu.com TRUE / FALSE 3690716228 BAIDUID 3131EAE6351C0F474BF6E477B848A52B:FG=1
.baidu.com TRUE / FALSE 3690716228 BIDUPSID 3131EAE6351C0F474BF6E477B848A52B
.baidu.com TRUE / FALSE H_PS_PSSID 27775_1454_21088_20719
.baidu.com TRUE / FALSE 3690716228 PSTM 1543232581
.baidu.com TRUE / FALSE delPer 0
www.baidu.com FALSE / FALSE BDSVRTM 0
www.baidu.com FALSE / FALSE BD_HOME 0xss
import http.cookiejar,urllib.request filename = 'cookie2.txt' cookie = http.cookiejar.LWPCookieJar(filename) handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) response = opener.open('http://www.baidu.com') cookie.save(ignore_discard=True,ignore_expires=True)
#LWP-Cookies-2.0
Set-Cookie3: BAIDUID="38C33C024449D6412F80B85996FAA2F8:FG=1"; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2086-12-14 15:01:56Z"; version=0
Set-Cookie3: BIDUPSID=38C33C024449D6412F80B85996FAA2F8; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2086-12-14 15:01:56Z"; version=0
Set-Cookie3: H_PS_PSSID=1462_21088_22157; path="/"; domain=".baidu.com"; path_spec; domain_dot; discard; version=0
Set-Cookie3: PSTM=1543232869; path="/"; domain=".baidu.com"; path_spec; domain_dot; expires="2086-12-14 15:01:56Z"; version=0
Set-Cookie3: delPer=0; path="/"; domain=".baidu.com"; path_spec; domain_dot; discard; version=0
Set-Cookie3: BDSVRTM=0; path="/"; domain="www.baidu.com"; path_spec; discard; version=0
Set-Cookie3: BD_HOME=0; path="/"; domain="www.baidu.com"; path_spec; discard; version=0
import http.cookiejar,urllib.request cookie = http.cookiejar.LWPCookieJar() cookie.load('cookie2.txt',ignore_discard=True,ignore_expires=True) handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) response = opener.open('http://www.baidu.com') print(response.read().decode('utf-8'))
from urllib import request,error try: response = request.urlopen('http://cuiiqdkfsj.com/insdfi.htm') except error.URLError as e: print(e.reason)
[Errno -2] Name or service not known
from urllib import request,error try: response = request.urlopen('http://www.baidu.com/dsjfi.htm') except error.HTTPError as e: print(e.reason,e.code,e.headers,sep='\n') except error.URLError as e: print(e.reason) else: print('Request Successfully')
[Errno -2] Name or service not known
import socket import urllib.request import urllib.error try: response = urllib.request.urlopen('https://www.baidu.com',timeout=0.01) except urllib.error.URLError as e: print(type(e.reason)) if isinstance(e.reason,socket.timeout): print('TIME OUT')
<class 'socket.timeout'>
TIME OUT
from urllib.parse import urlparse result = urlparse('http://www.baidu.com/index.html;user?id=5#comment') print(type(result),result)
<class 'urllib.parse.ParseResult'> ParseResult(scheme='http', netloc='www.baidu.com', path='/index.html', params='user', query='id=5', fragment='comment')
from urllib.parse import urlparse result = urlparse('www.baidu.com/index.html;user?id=5#comment',scheme='https') print(result)
ParseResult(scheme='https', netloc='', path='www.baidu.com/index.html', params='user', query='id=5', fragment='comment')
from urllib.parse import urlparse result = urlparse('http://www.baidu.com/index.html;user?id=5#comment',allow_fragments=False) print(result)
ParseResult(scheme='http', netloc='www.baidu.com', path='/index.html', params='user', query='id=5#comment', fragment='')
from urllib.parse import urlunparse data = ['http','www.baidu.com','index.html','user','a=6','comment'] print(urlunparse(data))
from urllib.parse import urlencode params = {'name':'germey','age':22} base_url = 'http://www.baidu.com?' url = base_url+urlencode(params) print(url)