xpath使用小結

lxml是一個支持xpath的python庫,例子html

from lxml import etree  

tree = etree.HTML("<head><title>hello</title></head>")  
nodes = tree.xpath("//table[@class='n_table']")  
for node in nodes:  
    n = node.text

例子2node

#coding:utf-8
import requests
from lxml import etree
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


def main():
    url = 'http://my.oschina.net/u/2351685/blog/607192'
    headers = {
        "Referer": url,
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36"
    }
    r = requests.get(url, headers=headers)
    html = r.content
    html = html.decode(r.encoding)
    tree = etree.HTML(html)
    nodes = tree.xpath(u"//div[@id='HotBlogs']/ol/li/a")
    for node in nodes:
        print node.text  #,node.attrib["href"]


if __name__ == '__main__':
    main()
相關文章
相關標籤/搜索