import requests import re import json from multiprocessing import Poolphp
def get_one_page(url): header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', } res = requests.get(url,headers=header) if res.status_code == 200: return res.text else: return Nonehtml
def parse_html(html): html.replace('\n','') pattern = ('<div class="cover g-playicon">.?<img src="(.*?)">.?<span class="hint">(.?)</span>.?<span class="s1">(.?)</span>.?<span class="s2">(.?)</span>.?<p class="star">(.*?)</p>') items = re.findall(pattern,html,re.S) print(items) for item in items: print(item) yield { 'img':item[0], 'time':item[1], 'title':item[2], 'pinfen':item[3], 'direct':item[4] }json
def write_to_file(content): with open('2.txt','a',encoding='utf-8')as f: f.write(json.dumps(content,ensure_ascii=False)+'\n') f.close()url
def main(offset):spa
url = 'https://www.360kan.com/dianying/list.php?cat=100&area=all&act=all&year=all&pageno='+str(offset) html = get_one_page(url) for item in parse_html(html): write_to_file(item) parse_html(html)
if name == 'main': for i in range(10): main(i)code