lxml是一個支持xpath的python庫,例子html
from lxml import etree tree = etree.HTML("<head><title>hello</title></head>") nodes = tree.xpath("//table[@class='n_table']") for node in nodes: n = node.text
例子2node
#coding:utf-8 import requests from lxml import etree import sys reload(sys) sys.setdefaultencoding('utf-8') def main(): url = 'http://my.oschina.net/u/2351685/blog/607192' headers = { "Referer": url, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36" } r = requests.get(url, headers=headers) html = r.content html = html.decode(r.encoding) tree = etree.HTML(html) nodes = tree.xpath(u"//div[@id='HotBlogs']/ol/li/a") for node in nodes: print node.text #,node.attrib["href"] if __name__ == '__main__': main()