urllib.urlencode()api
urllib2.urlopen()socket
urlib2學習指南:函數
查看源碼可知是這樣的路線。學習
數據流向ui
urllib2.urlopen() ->opener.open() -> Request() -> _open() -> 等等 -> httplib庫this
由此大體能夠得出,要訪問一個網址,能夠使用以下幾種方法。url
urlopen()spa
opener.open()code
Request()以後使用urlopen或者operner.open()get
也就是說Request()這個類不提供open的操做,只是對數據的封裝。
class Request: def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) self.__original, self.__fragment = splittag(self.__original) self.type = None # self.__r_type is what's left after doing the splittype self.host = None self.port = None self._tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable def __getattr__(self, attr): # XXX this is a fallback mechanism to guard against these # methods getting called in a non-standard order. this may be # too complicated and/or unnecessary. # XXX should the __r_XXX attributes be public? if attr[:12] == '_Request__r_': name = attr[12:] if hasattr(Request, 'get_' + name): getattr(self, 'get_' + name)() return getattr(self, attr) raise AttributeError, attr def get_method(self): if self.has_data(): return "POST" else: return "GET" # XXX these helper methods are lame def add_data(self, data): self.data = data def has_data(self): return self.data is not None def get_data(self): return self.data def get_full_url(self): if self.__fragment: return '%s#%s' % (self.__original, self.__fragment) else: return self.__original def get_type(self): if self.type is None: self.type, self.__r_type = splittype(self.__original) if self.type is None: raise ValueError, "unknown url type: %s" % self.__original return self.type def get_host(self): if self.host is None: self.host, self.__r_host = splithost(self.__r_type) if self.host: self.host = unquote(self.host) return self.host def get_selector(self): return self.__r_host def set_proxy(self, host, type): if self.type == 'https' and not self._tunnel_host: self._tunnel_host = self.host else: self.type = type self.__r_host = self.__original self.host = host def has_proxy(self): return self.__r_host == self.__original def get_origin_req_host(self): return self.origin_req_host def is_unverifiable(self): return self.unverifiable def add_header(self, key, val): # useful for something like authentication self.headers[key.capitalize()] = val def add_unredirected_header(self, key, val): # will not be added to a redirected request self.unredirected_hdrs[key.capitalize()] = val def has_header(self, header_name): return (header_name in self.headers or header_name in self.unredirected_hdrs) def get_header(self, header_name, default=None): return self.headers.get( header_name, self.unredirected_hdrs.get(header_name, default)) def header_items(self): hdrs = self.unredirected_hdrs.copy() hdrs.update(self.headers) return hdrs.items()
opener是OpenDirector()的實例。
能夠經過build_opener()函數, 也能夠經過OpenerDirector實例化一個opener,而後調用add_handler方法。正所謂條條大道通羅馬。
install_opener()能夠讓opener對urlopen()也生效
def install_opener(opener): global _opener _opener = opener
global
語句被用來聲明
xxx
是全局的——所以,當咱們在函數內把值賦給
xxx
的時候,這個變化也反映在咱們在主塊中使用
x
的值的時候
def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, cafile=None, capath=None, cadefault=False, context=None): global _opener
something