pyquery的簡單用途:css
import pyquery html=""" <html> <title>這是標題</title> <body> <p id="hi">Hello</p> <ul> <li>list1</li> <li>list2</li> </ul> </body> </html> """ pyq=pyquery.PyQuery(html) #根據字符串初始化 print(pyq('title')) print(pyq('title').text()) #.text() 取出標籤之間的內容 print(pyq("#hi")) # #表明id print(pyq("#hi").text()) lilist=pyq('li') for line in lilist: print(pyq(line).text()) #處理子元素
pyquery初始化方式:html
import pyquery import lxml.etree #初始化各類網頁 doc1=pyquery.PyQuery("<html></html>") doc2=pyquery.PyQuery(lxml.etree.fromstring("<html></html>")) doc3=pyquery.PyQuery("http://www.baidu.com") doc4=pyquery.PyQuery(filename="index.html") print(type(doc1)) print(doc1.html) print(doc1) print(doc2) print(doc3) print(doc4)
pyquery的css選擇:spa
#coding:utf-8 import pyquery import lxml.etree doc=pyquery.PyQuery(filename="index.html") print(type(doc)) print(doc('head')) #head標籤 print(doc('head title')) #head標籤 title標籤 print(doc('head title').text()) #標籤中間的文本 print("---------------------------") print(type(doc("#container"))) #list,1個,多個 print(doc("#container")[0]) print(doc("#container").attr["class"]) #list第一個 ,取標籤內部屬性 #print(doc("#container .list")) 按照類型取出class=list #print(doc("#container .list li"))