太簡單了就當作個記錄,直接貼代碼了html
import os import requests from bs4 import BeautifulSoup import time # 發送請求 def send(): r = requests.get(url=base_url) # 設置編碼防止亂碼 r.encoding ="GBK"; content = r.text parseAndSave(content) # 解析頁面和保存數據 def parseAndSave(html): soup = BeautifulSoup(html, 'lxml') ulList = soup.find_all('ul', attrs={'class': 'kzlist'}) # print(ulList); for ul in ulList: li = ul.find_all('li'); for item in li: name = item.find('img').next_sibling obtain_method = item.find('a').find('p').text rootDir = os.getcwd() if not os.path.exists(name): os.mkdir(name); os.chdir(name); src = item.find('a').find('img')['src'] pic = requests.get(src) with open('pic.jpg', 'wb') as fw: fw.write(pic.content) with open('info.txt', 'a+') as fw: fw.write(name+'\n') fw.write(obtain_method) os.chdir(rootDir); def main(): start_time = time.time() send() end_time = time.time() print('程序用時:',(end_time - start_time)) if __name__ == '__main__': base_url = 'http://news.4399.com/gonglue/lscs/kabei/' cardList = [] main()