python爬蟲入門--抓取wiki詞條

from bs4 import BeautifulSoup
import re 
from urllib import request
req = request.urlopen("https://en.m.wikipedia.org/wiki/Main_Page").read().decode("utf-8");
soup = BeautifulSoup(req,"html.parser");
for tag in soup.find_all("a",href=re.compile('^/wiki/')):
    if not re.search("\.(jpg|JPG)$",tag["href"]):
        print(tag.get_text(),"<--->","http://en.m.wikipedia.org"+tag["href"]);
    
相關文章
相關標籤/搜索