爬騰訊招聘

 1 import requests
 2 import time
 3 import json
 4 import threading
 5 #   採集https://careers.tencent.com/search.html 網站的招聘信息
 6 
 7 #   時間戳
 8 timestamp = '%d' % (time.time() * 1000)
 9 
10 
11 #   請求url,解析數據
12 def parse_url(json_url):
13     #   發起請求
14     res = requests.get(json_url).json()
15     for i in res['Data']['Posts']:
16         #   職位名稱
17         title = i['RecruitPostName']
18         #   工做職責
19         resbity = i['Responsibility']
20         #   職位ID
21         id = i['PostId']
22         #   職位連接
23         posi_url = 'https://careers.tencent.com/jobdesc.html?postId=' + id
24         #   根據ID找到工做詳情頁的內容
25         id_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format(
26             timestamp, id)
27         res_ment = requests.get(id_url).json()
28         #   工做要求
29         rement = res_ment['Data']['Requirement']
30         #   發佈時間
31         posi_time = i['LastUpdateTime']
32         item = {
33             '職位': title,
34             '職責': resbity,
35             '要求': rement,
36             '連接': posi_url,
37             '時間': posi_time
38         }
39         print('正在寫入 → ', item)
40         with open('騰訊招聘.json', 'a', encoding='utf-8') as f:
41             f.write(json.dumps(item, ensure_ascii=False) + '\n')
42 
43 
44 # 頁數
45 num = 10
46 t_list = []
47 for count in range(1, num + 1):
48     print('加載第{}頁數據'.format(count))
49     #   json數據源
50     json_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format(
51         timestamp, count)
52     t = threading.Thread(target=parse_url, args=((json_url,)))
53     t_list.append(t)
54 
55 for t in t_list:
56     t.start()
57 for t in t_list:
58     t.join()
騰訊招聘
相關文章
相關標籤/搜索