# -*- coding: utf-8 -*- from gevent import monkey monkey.patch_all() import urllib.request from gevent.pool import Pool import json import datetime import random import time import urllib.request from urllib.request import Request import urllib.parse import os import re import requests #標記翻頁數據 starttime = datetime.datetime.now() user_agent = [ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', ] def getMMData(url,currentPage=0): formdata = { 'q':'', 'viewFlag':'A', 'sortType':'default', 'searchStyle':'', 'searchRegion':'city:', 'searchFansNum':'', 'currentPage': currentPage, 'pageSize': '100' } formdata = urllib.parse.urlencode(formdata) headers = {'User-Agent': random.choice(user_agent)} requ = Request(url, data=bytes(formdata, 'utf-8'), headers=headers) data = urllib.request.urlopen(requ).read() data = data.decode('gbk') dataToDict = json.loads(data) return dataToDict['data']['searchDOList'] # 獲取ID def getMMID(data): ID = [] for i in data: ID.append(i['userId']+i['avatarUrl']+i['realName']+i['city']+i['height']+i['weight']) return ID def getgirl(page): url = 'https://mm.taobao.com/tstar/search/tstar_model.do?_input_charset=utf-8' try: for i in getMMData(url, page): a = ("https://mm.taobao.com/self/aiShow.htm?spm=719.7763510.1998643336.2.2WOZLp&userId=%d" % i['userId']) path = os.getcwd() # 獲取此腳本所在目錄 new_path = os.path.join(path, i['realName']) print(i['realName']) if not os.path.isdir(new_path): os.mkdir(new_path) f = open(i['realName'] + '/' + i['realName'] + ".txt", "w+") f.write("姓名:" + i['realName'] + "\n" + "城市: " + i['city'] + "\n" + "我的主頁:" + a + "\n" + "身高:" + i['height'] + "\n" + "體重:" + i['weight']) try: fwws = urllib.request.urlopen("http:" + i['avatarUrl'], timeout=10) with open(i['realName'] + '/' + time.strftime('%H-%M-%S') + random.choice( 'qwertyuiopasdfghjklzxcvbnm') + ".jpg", 'wb') as code: code.write(fwws.read()) b = requests.get(a) pattern = re.compile(r'(13\d|14[579]|15[^4\D]|17[^49\D]|18\d)\d{8}') tel = pattern.search(b.text) fz = open(i['realName'] + '/' + "聯繫方式.txt", "w+") fz.write("mathch內就是聯繫電話" + str(tel)) fz.close() except: print("拋出頁面%s"%a) except EOFError: print("程序出現錯誤") finally: print("如今跑到%d"%page) def run(): pool = Pool(2) pool.map(getgirl,range(1,46)) if __name__ == '__main__': print("開始啓動程序")
run()