passhtml
import requests import re session = requests.session() # 第一步:訪問登錄頁,拿到X_Anti_Forge_Token,X_Anti_Forge_Code # 一、請求url:https://passport.lagou.com/login/login.html # 二、請求方法:GET # 三、請求頭: # User-agent r1 = session.get('https://passport.lagou.com/login/login.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }, ) X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r1.text, re.S)[0] X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r1.text, re.S)[0] # print(X_Anti_Forge_Token,X_Anti_Forge_Code) # 第二步:登錄 # 一、請求url:https://passport.lagou.com/login/login.json # 二、請求方法:POST # 三、請求頭: # cookie # User-agent # Referer:https://passport.lagou.com/login/login.html # X-Anit-Forge-Code:53165984 # X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78 # X-Requested-With:XMLHttpRequest # 四、請求體: # isValidate:true # username:18611453110 # password:70621c64832c4d4d66a47be6150b4a8e # request_form_verifyCode:'' # submit:'' r2 = session.post('https://passport.lagou.com/login/login.json', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': 'https://passport.lagou.com/login/login.html', 'X-Anit-Forge-Code': X_Anti_Forge_Code, 'X-Anit-Forge-Token': X_Anti_Forge_Token, 'X-Requested-With': 'XMLHttpRequest' }, data={ "isValidate": True, 'username': '18611453110', 'password': '70621c64832c4d4d66a47be6150b4a8e', 'request_form_verifyCode': '', 'submit': '' } ) # 第三步:受權 # 一、請求url:https://passport.lagou.com/grantServiceTicket/grant.html # 二、請求方法:GET # 三、請求頭: # User-agent # Referer:https://passport.lagou.com/login/login.html r3 = session.get('https://passport.lagou.com/grantServiceTicket/grant.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': 'https://passport.lagou.com/login/login.html', } ) # 第四步:驗證 r4 = session.get('https://www.lagou.com/resume/myresume.html', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } ) # print('18611453110' in r4.text) # 第五步:篩選職位信息 # 請求url:https://www.lagou.com/jobs/list_java%E9%AB%98%E7%BA%A7%E5%BC%80%E5%8F%91 # 請求方法:GET # 請求頭: # User-Agent # 請求參數: # gj:3年及如下 # px:default # yx:25k-50k # city:北京 from urllib.parse import urlencode res = urlencode({'k': 'java高級開發'}, encoding='utf-8').split('=')[-1] url = 'https://www.lagou.com/jobs/list_' + res # # r5 = session.get(url, # headers={ # 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', # }, # params={ # 'gj': '3年及如下', # 'px': 'default', # 'yx': '25k-50k', # 'city': '北京' # } # ) # # print(r5.text) #請求url:https://www.lagou.com/jobs/positionAjax.json #請求方法:POST #請求頭 # Referer # User-Agent #請求體: # first:true # pn:1 # kd:java高級開發 #請求參數 # params={ # 'gj': '3年及如下', # 'px': 'default', # 'yx': '25k-50k', # 'city': '北京', # 'needAddtionalResult':False, # 'isSchoolJob':0 # } r6=session.post('https://www.lagou.com/jobs/positionAjax.json', headers={ 'Referer':url, 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }, data={ 'first':True, 'pn':2, 'kd':'java高級開發' }, params={ 'gj': '3年及如下', 'px': 'default', 'yx': '25k-50k', 'city': '北京', 'needAddtionalResult': False, 'isSchoolJob': 0 } ) from pprint import pprint # print(r6.json()) comapines_list=r6.json()['content']['positionResult']['result'] for comapiny in comapines_list: positionId=comapiny['positionId'] company_link='https://www.lagou.com/jobs/{pos_id}.html'.format(pos_id=positionId) companyShortName = comapiny['companyShortName'] positionName = comapiny['positionName'] salary = comapiny['salary'] print(''' 詳情鏈接:%s 公司名:%s 職位名:%s 薪資:%s ''' %(company_link,companyShortName,positionName,salary)) #第七步:訪問詳情頁,拿到X_Anti_Forge_Token,X_Anti_Forge_Code # 請求url:詳情頁地址 # 請求方式:GET # 請求頭:User-Agent r7=session.get(company_link, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } ) X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r7.text, re.S)[0] X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r7.text, re.S)[0] # print(X_Anti_Forge_Token,X_Anti_Forge_Code) #第八步:投遞簡歷 #請求url:https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json #請求方式:POST #請求頭: #Referer:詳情頁地址 #User-agent #X-Anit-Forge-Code:53165984 #X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78 #X-Requested-With:XMLHttpRequest #請求體: # positionId:職位ID # type:1 # force:true session.post('https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json', headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer': company_link, 'X-Anit-Forge-Code': X_Anti_Forge_Code, 'X-Anit-Forge-Token': X_Anti_Forge_Token, 'X-Requested-With': 'XMLHttpRequest' }, data={ 'positionId':positionId, 'type':1, 'force':True } ) print('%s 投遞成功' %(companyShortName))