import requests
import re
session = requests.session()
第一步:訪問登錄頁,拿到X_Anti_Forge_Token,X_Anti_Forge_Code
# 一、請求url:https://passport.lagou.com/login/login.html
# 二、請求方法:GET 由於是get請求不須要請求體
# 三、請求頭:User-agent
代碼以下:
r1 = session.get('https://passport.lagou.com/login/login.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }, ) X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r1.text, re.S)[0] #正則表達式獲取的值是一個列表 X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r1.text, re.S)[0]
第二步:登錄html
# 一、請求url:https://passport.lagou.com/login/login.json
# 二、請求方法:POST
# 三、請求頭:包含:cookie,User-agent,Referer,X-Anit-Forge-Code,X-Anit-Forge-Token
# 四、請求體包含以下:
# isValidate:true
# username:18611453110
# password:70621c64832c4d4d66a47be6150b4a8e
# request_form_verifyCode:''
# submit:''
代碼以下:
r2 = session.post('https://passport.lagou.com/login/login.json', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': 'https://passport.lagou.com/login/login.html', 'X-Anit-Forge-Code': X_Anti_Forge_Code, 'X-Anit-Forge-Token': X_Anti_Forge_Token, 'X-Requested-With': 'XMLHttpRequest' }, data={ "isValidate": True, 'username': '18611453110',#這是登錄的用戶名, 'password': '70621c64832c4d4d66a47be6150b4a8e',#這是加密的密碼 'request_form_verifyCode': '', 'submit': '' } )
第三步:受權java
一、請求url:https://passport.lagou.com/grantServiceTicket/grant.html
# 二、請求方法:GET
# 三、請求頭:包含:User-agent,Referer
r3 = session.get('https://passport.lagou.com/grantServiceTicket/grant.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': 'https://passport.lagou.com/login/login.html', } )
第四步:驗證是登錄成功:
r4 = session.get('https://www.lagou.com/resume/myresume.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } )
# print('18611453110' in r4.text)#驗證是否登錄成功
第五步:篩選職位信息ajax
# 請求url:https://www.lagou.com/jobs/list_java%E9%AB%98%E7%BA%A7%E5%BC%80%E5%8F%91
# 請求方法:GET
# 請求頭:
# User-Agent
# 請求參數:
# gj:3年及如下
# px:default
# yx:25k-50k
# city:北京
from urllib.parse import urlencode res = urlencode({'k': 'java高級開發'}, encoding='utf-8').split('=')[-1] url = 'https://www.lagou.com/jobs/list_' + res
# r5 = session.get(url, # headers={ # 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', # }, # params={ # 'gj': '3年及如下', # 'px': 'default', # 'yx': '25k-50k', # 'city': '北京' # } # ) # # print(r5.text)
沒有取到數據,由於數據是經過ajax發送的,因此咱們換另外一種方法解決:正則表達式
#請求url:https://www.lagou.com/jobs/positionAjax.json
#請求方法:POST
#請求頭
# Referer
# User-Agent
#請求體:
# first:true
# pn:1
# kd:java高級開發
#請求參數
# params={
# 'gj': '3年及如下',
# 'px': 'default',
# 'yx': '25k-50k',
# 'city': '北京',
# 'needAddtionalResult':False,
# 'isSchoolJob':0
# }
r6=session.post('https://www.lagou.com/jobs/positionAjax.json', headers={ 'Referer':url, 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }, data={ 'first':True, 'pn':1, 'kd':'java高級開發' }, params={ 'gj': '3年及如下', 'px': 'default', 'yx': '25k-50k', 'city': '北京', 'needAddtionalResult': False, 'isSchoolJob': 0 } )
comapines_list=r6.json()['content']['positionResult']['result'] for comapiny in comapines_list: positionId=comapiny['positionId'] company_link='https://www.lagou.com/jobs/{pos_id}.html'.format(pos_id=positionId) companyShortName = comapiny['companyShortName'] positionName = comapiny['positionName'] salary = comapiny['salary'] print(''' 詳情鏈接:%s 公司名:%s 職位名:%s 薪資:%s ''' %(company_link,companyShortName,positionName,salary))
#第七步:訪問詳情頁,拿到X_Anti_Forge_Token,X_Anti_Forge_Code
# 請求url:詳情頁地址
# 請求方式:GET
# 請求頭:User-Agent
r7=session.get(company_link, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } ) X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r7.text, re.S)[0] X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r7.text, re.S)[0]
#第八步:投遞簡歷
#請求url:https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json
#請求方式:POST
#請求頭:
#Referer:詳情頁地址
#User-agent
#X-Anit-Forge-Code:53165984
#X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78
#X-Requested-With:XMLHttpRequest
#請求體:
# positionId:職位ID
# type:1
# force:true
session.post('https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': company_link, 'X-Anit-Forge-Code': X_Anti_Forge_Code, 'X-Anit-Forge-Token': X_Anti_Forge_Token, 'X-Requested-With': 'XMLHttpRequest' }, data={ 'positionId':positionId, 'type':1, 'force':True } ) print('%s 投遞成功' %(companyShortName))
第7步,8步是並列的,放在第六步的裏面。第六步找到一個公司,進入詳情頁,而後投遞簡歷。