1 import requests 2 import time 3 import json 4 import threading 5 # 採集https://careers.tencent.com/search.html 網站的招聘信息 6 7 # 時間戳 8 timestamp = '%d' % (time.time() * 1000) 9 10 11 # 請求url,解析數據 12 def parse_url(json_url): 13 # 發起請求 14 res = requests.get(json_url).json() 15 for i in res['Data']['Posts']: 16 # 職位名稱 17 title = i['RecruitPostName'] 18 # 工做職責 19 resbity = i['Responsibility'] 20 # 職位ID 21 id = i['PostId'] 22 # 職位連接 23 posi_url = 'https://careers.tencent.com/jobdesc.html?postId=' + id 24 # 根據ID找到工做詳情頁的內容 25 id_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format( 26 timestamp, id) 27 res_ment = requests.get(id_url).json() 28 # 工做要求 29 rement = res_ment['Data']['Requirement'] 30 # 發佈時間 31 posi_time = i['LastUpdateTime'] 32 item = { 33 '職位': title, 34 '職責': resbity, 35 '要求': rement, 36 '連接': posi_url, 37 '時間': posi_time 38 } 39 print('正在寫入 → ', item) 40 with open('騰訊招聘.json', 'a', encoding='utf-8') as f: 41 f.write(json.dumps(item, ensure_ascii=False) + '\n') 42 43 44 # 頁數 45 num = 10 46 t_list = [] 47 for count in range(1, num + 1): 48 print('加載第{}頁數據'.format(count)) 49 # json數據源 50 json_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format( 51 timestamp, count) 52 t = threading.Thread(target=parse_url, args=((json_url,))) 53 t_list.append(t) 54 55 for t in t_list: 56 t.start() 57 for t in t_list: 58 t.join()