from bs4 import BeautifulSoup def getHtml(url): import urllib import urllib.request print("第二種方法") request = urllib.request.Request(url) request.add_header("user-agent", "Mozilla/5.0") response2 = urllib.request.urlopen(url) print(response2.getcode()) # print(response2) # response2.read() # print(len(response2.read())) data=response2.read().decode("utf-8") return data def urllist(): baseurl="http://www.sanye.cx/?cate=69" lists=[] for x in range(1,11): url=baseurl+"&page="+str(x) lists.append(url) # lists.reverse() return lists def logtext(content): f=open('sanye.md','a+',encoding="utf-8") f.write(content) f.write("\r\n") def parsedata(data): soup=BeautifulSoup(data,'html.parser') print(soup.title.text) div=soup.find(class_='list') ul=div.find('ul') lis=ul.find_all ('li') for li in lis: a=li.find('h2').find('a') name=a.get_text() print(name) name="##"+name logtext(name) def run(): urllists=urllist() print(urllists) for url in urllists: data=getHtml(url) parsedata(data) run()