python urllib模塊裏面一些函數

urllib裏面的一些函數,發現挺有用,特記在這裏.備之後使用.git

 

# splittype('type:opaquestring') --> 'type', 'opaquestring'函數

# splithost('//host[:port]/path') --> 'host[:port]', '/path'url

# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' # splitpasswd('user:passwd') -> 'user', 'passwd' # splitport('host:port') --> 'host', 'port' # splitquery('/path?query') --> '/path', 'query' # splittag('/path#tag') --> '/path', 'tag' # splitattr('/path;attr1=value1;attr2=value2;...') -> #   '/path', ['attr1=value1', 'attr2=value2', ...] # splitvalue('attr=value') --> 'attr', 'value' def splittype(url):     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""     global _typeprog     if _typeprog is None:         import re         _typeprog = re.compile('^([^/:]+):')     match = _typeprog.match(url)     if match:         scheme = match.group(1)         return scheme.lower(), url[len(scheme) + 1:]     return None, url _hostprog = None def splithost(url):     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""     global _hostprog     if _hostprog is None:         import re         _hostprog = re.compile('^//([^/?]*)(.*)$')     match = _hostprog.match(url)     if match: return match.group(1, 2)     return None, url _userprog = None def splituser(host):     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""     global _userprog     if _userprog is None:         import re         _userprog = re.compile('^(.*)@(.*)$')     match = _userprog.match(host)     if match: return map(unquote, match.group(1, 2))     return None, host _passwdprog = None def splitpasswd(user):     """splitpasswd('user:passwd') -> 'user', 'passwd'."""     global _passwdprog     if _passwdprog is None:         import re         _passwdprog = re.compile('^([^:]*):(.*)$',re.S)     match = _passwdprog.match(user)     if match: return match.group(1, 2)     return user, None # splittag('/path#tag') --> '/path', 'tag' _portprog = None def splitport(host):     """splitport('host:port') --> 'host', 'port'."""     global _portprog     if _portprog is None:         import re         _portprog = re.compile('^(.*):([0-9]+)$')     match = _portprog.match(host)     if match: return match.group(1, 2)     return host, None _nportprog = None def splitnport(host, defport=-1):     """Split host and port, returning numeric port.     Return given default port if no ':' found; defaults to -1.     Return numerical port if a valid number are found after ':'.     Return None if ':' but not a valid number."""     global _nportprog     if _nportprog is None:         import re         _nportprog = re.compile('^(.*):(.*)$')     match = _nportprog.match(host)     if match:         host, port = match.group(1, 2)         try:             if not port: raise ValueError, "no digits"             nport = int(port)         except ValueError:             nport = None         return host, nport     return host, defport _queryprog = None def splitquery(url):     """splitquery('/path?query') --> '/path', 'query'."""     global _queryprog     if _queryprog is None:         import re         _queryprog = re.compile('^(.*)\?([^?]*)$')     match = _queryprog.match(url)     if match: return match.group(1, 2)     return url, None _tagprog = None def splittag(url):     """splittag('/path#tag') --> '/path', 'tag'."""     global _tagprog     if _tagprog is None:         import re         _tagprog = re.compile('^(.*)#([^#]*)$')     match = _tagprog.match(url)     if match: return match.group(1, 2)     return url, None def splitattr(url):     """splitattr('/path;attr1=value1;attr2=value2;...') ->         '/path', ['attr1=value1', 'attr2=value2', ...]."""     words = url.split(';')     return words[0], words[1:] _valueprog = None def splitvalue(attr):     """splitvalue('attr=value') --> 'attr', 'value'."""     global _valueprog     if _valueprog is None:         import re         _valueprog = re.compile('^([^=]*)=(.*)$')     match = _valueprog.match(attr)     if match: return match.group(1, 2)     return attr, None
相關文章
相關標籤/搜索