這是網頁上的script 我要獲取的是00914這個數字 直接使用正則表達式便可html
運行結果:python
源碼:正則表達式
import re from bs4 import BeautifulSoup from urllib.request import urlopen url = "你要解析的網頁URL" html = urlopen(url).read() soup = BeautifulSoup(html,"html.parser") titles = soup.select("body script") # CSS 選擇器 i = 1 for title in titles: if i == 3: #print(title.get_text())# 標籤體、標籤屬性 str=title.get_text() break if i == 2: i = 3 if i == 1: i = 2 print(str) str1 = "\"\"\""+"<script>"+str+"</script>"+"\"\"\"" soup = BeautifulSoup(str1, "html.parser") pattern = re.compile(r"var _url = '(.*?)';$", re.MULTILINE | re.DOTALL) script = soup.find("script", text=pattern) #print (pattern.search(script.text).string) s = pattern.search(script.text).string print (s.split('\'')[11])