本文爬取了貓眼電影top100電影信息。
網址:https://maoyan.com/board/4html
import requests from bs4 import BeautifulSoup import time def get_content(url): #獲取html headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36', 'Accept-Language': 'zh-CN,zh;q=0.9' } r = requests.get(url,headers=headers) if r.status_code ==200: return r.text return None def get_info(html): #從html中獲取信息 soup = BeautifulSoup(html,'lxml') movie_list = soup.find('dl',class_='board-wrapper').find_all('dd') l_ = [] for i in movie_list: rank = i.find('i').text name = i.find('p',class_='name').text actor = i.find('p',class_='star').text.strip()[3:] time = i.find('p',class_='releasetime').text.strip()[5:] score = i.find('p',class_='score').text l_.append([rank,name,actor,time,score]) return l_ def write_to_file(l_): with open('result.txt','a',encoding='utf-8') as f: for i in l_: f.write(str(i)+'\n') def main(i): url = 'https://maoyan.com/board/4?offset={}'.format(i) html = get_content(url) l_ = get_info(html) write_to_file(l_) if __name__ == '__main__': for i in range(0,100,10): try: main(i) except: main(i) time.sleep(1)