BeautifulSoup(markup,‘html.parser’)html
BeautifulSoup(markup,‘lxml’)html5
BeautifulSoup(markup,‘xml’)ui
BeautifulSoup(markup,‘html5lib’)spa
#j基本用法 from bs4 import BeautifulSoup soup=BeautifulSoup print(soup.prettify()) print(soup.title.string) #標籤選擇器 #選擇元素 soup.head soup.p #獲取名稱 soup.title.name #獲取屬性 soup..p.attrs['name'] soup.p['name'] #獲取內容 soup.p.string #嵌套選擇 soup.head.title.string #子節點和子孫節點 soup.p.contents soup.p.children for i,child in enumerate(soup.p.children): print(i,children) #父節點和祖先節點 soup.a.parent soup.a.parents #兄弟節點 soup.a.next_siblings soup.a.previous_siblings
#標準選擇器code
find_all(name.attrs..) soup.find_all('ui')[0] siuo.find_all(attrs={'id':'list-1'}) soup.find_all(class_='element') soup.find_all(text='Foo') find:返回單個元素 find_parents
#CSS選擇器xml
1 soup.select('.panel .pandel_a') 2 soup.select(' ui li') 3 soup.select('#list-2 .element) 4 5 for ul in soup.select('ui') 6 print(ul.select('li') 7 8 #獲取屬性 9 print(li.attrs['id']) 10 11 獲取內容 12 print(ul.get_text())