BeautifulSoup庫

BeautifulSoup(markup,‘html.parser’)html

BeautifulSoup(markup,‘lxml’)html5

BeautifulSoup(markup,‘xml’)ui

BeautifulSoup(markup,‘html5lib’)spa

 

#j基本用法
from bs4 import BeautifulSoup
soup=BeautifulSoup
print(soup.prettify())
print(soup.title.string)

#標籤選擇器
#選擇元素
soup.head 
soup.p

#獲取名稱
soup.title.name

#獲取屬性
soup..p.attrs['name']
soup.p['name']

#獲取內容
soup.p.string

#嵌套選擇
soup.head.title.string

#子節點和子孫節點
soup.p.contents

soup.p.children
for i,child in enumerate(soup.p.children):
     print(i,children)

#父節點和祖先節點
soup.a.parent
soup.a.parents

#兄弟節點
soup.a.next_siblings
soup.a.previous_siblings

#標準選擇器code

find_all(name.attrs..)
soup.find_all('ui')[0]


siuo.find_all(attrs={'id':'list-1'})
soup.find_all(class_='element')

soup.find_all(text='Foo')

find:返回單個元素

find_parents 

#CSS選擇器xml

 1 soup.select('.panel .pandel_a')
 2 soup.select(' ui li')
 3 soup.select('#list-2 .element)
 4 
 5 for ul in soup.select('ui')
 6      print(ul.select('li')
 7 
 8 #獲取屬性
 9 print(li.attrs['id'])
10 
11 獲取內容
12 print(ul.get_text())
相關文章
相關標籤/搜索