1 from urllib.request import urlopen 2 from urllib.request import HTTPError 3 from bs4 import BeautifulSoup 4 5 def getTag(url,tager): 6 try: 7 html = urlopen(url) 8 except HTTPError as e: 9 return None 10 try: 11 bsObj = BeautifulSoup(html.read(),"html.parser") 12 print(tager) 13 title = bsObj(tager) 14 except AttributeError as e: 15 return None 16 return title 17 18 19 title = getTag("http://www.pythonscraping.com/pages/page1.html",'title') 20 if title is None: 21 print("Title could not be found") 22 else: 23 print(title)
實例2、只獲取單個標籤html
1 from urllib.request import urlopen 2 from urllib.request import HTTPError 3 from bs4 import BeautifulSoup 4 5 def getTitle(url): 6 try: 7 html = urlopen(url) 8 except HTTPError as e: 9 return None 10 try: 11 bsObj = BeautifulSoup(html.read(),"html.parser") 12 title = bsObj.title 13 except AttributeError as e: 14 return None 15 return title 16 17 18 title = getTitle("http://www.pythonscraping.com/pages/page1.html") 19 if title is None: 20 print("Title could not be found") 21 else: 22 print(title)