若是安裝了anaconda的集成庫,則不須要再次安裝
若是沒有,則使用pip指令安裝
pip install beautifulsoup4 -i https://pypi.tuna.tsinghua.edu.cn/simple/
html
右鍵網頁,查看源點web
使用requests庫app
import requests from bs4 import BeautifulSoup kv = {'user-agent' : 'Mozilla/5.0'} url = "https://blog.csdn.net/qq_43321732" try: r = requests.get(url, headers = kv) r.raise_for_status() demo = r.text except: print("爬取失敗") soup = BeautifulSoup(demo, "html.parser") print(soup.prettify())
import requests from bs4 import BeautifulSoup kv = {'user-agent' : 'Mozilla/5.0'} url = "https://ac.nowcoder.com/acm/contest/5666" try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding demo = r.text except: print("爬取失敗") soup = BeautifulSoup(demo, "html.parser") print(soup.title)
import requests from bs4 import BeautifulSoup kv = {'user-agent' : 'Mozilla/5.0'} url = "https://ac.nowcoder.com/acm/contest/5666" try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding demo = r.text except: print("爬取失敗") soup = BeautifulSoup(demo, "html.parser") tag = soup.a print(tag)
import requests from bs4 import BeautifulSoup kv = {'user-agent' : 'Mozilla/5.0'} url = "https://ac.nowcoder.com/acm/contest/5666" try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding demo = r.text except: print("爬取失敗") soup = BeautifulSoup(demo, "html.parser") tag = soup.a print(tag.attrs) print(tag.attrs['class']) #連接屬性 print(tag.attrs['href']) #標籤屬性類型(字典類型) print(type(tag.attrs)) print(type(tag))
import requests from bs4 import BeautifulSoup kv = {'user-agent' : 'Mozilla/5.0'} url = "https://ac.nowcoder.com/acm/contest/5666" try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding demo = r.text except: print("爬取失敗") soup = BeautifulSoup(demo, "html.parser") print(soup.head) print(soup.head.contents) print(soup.body.contents) print(len(soup.head.contents)) print(soup.head.contents[1])
平行遍歷必須發生在同一個父親節點下
svg