爬取拉勾網招聘信息,能夠自定義搜索關鍵字。並把搜索結果保存在 excel 表格中html
# -*- coding:utf-8 -*- import requests,json,xlwt kd = 'linux' items = [] def get_content(pn): #url和data經過F12查看Network->XHR->Headers->Request URL和Form Data url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false' data = {'first':'true', 'pn':pn, 'kd':kd} #url發送一個post請求,把data數據發送過去 html = requests.post(url,data).text #獲取文本 html = json.loads(html) #json格式字符串解碼轉換成python字典對象 #print html for i in range(14): #每頁15項職位 item = [] #下面參數經過F12查看Network->XHR->Preview->content->positionResult->result item.append(html['content']['positionResult']['result'][i]['positionName']) item.append(html['content']['positionResult']['result'][i]['companyFullName']) item.append(html['content']['positionResult']['result'][i]['salary']) item.append(html['content']['positionResult']['result'][i]['city']) item.append(html['content']['positionResult']['result'][i]['positionAdvantage']) item.append(html['content']['positionResult']['result'][i]['companyLabelList']) item.append(html['content']['positionResult']['result'][i]['firstType']) items.append(item) #print items return items def excel_write(items): newTable = 'test.xls' wb = xlwt.Workbook(encoding='utf-8') #建立表格文件 ws = wb.add_sheet('test1') #建立表 headData = ['招聘職位','公司','薪資','地區','福利','提供條件','工做類型'] #定義表格首行信息 for hd in range(0,7): ws.write(0,hd,headData[hd],xlwt.easyxf('font: bold on')) #0行 hd列 #寫數據 index = 1 #從第二行開始寫 for item in items: for i in range(0,7): print item[i] ws.write(index,i,item[i]) index +=1 #print index wb.save(newTable) #保存數據 if __name__ == "__main__": for pn in range(1,5): #爬取1-5頁職位 items = get_content(pn) excel_write(items)
執行後,會在腳本同目錄下生成一個 test.xls 表格,表格內容以下:python
說明:須要安裝三個模塊linux
一、requests:請求頁面json
二、xlwt:寫入表格(讀取表格須要xlrd模塊)app
三、pyopenssl:不安裝會報以下錯誤ide
C:\Python27\lib\requests\packages\urllib3\util\ssl_.py:335: SNIMissingWarning: An HTTPS request has been made, but the SNI (Subject Name Indication) extension to TLS is not available on this platform. This may cause the server to present an incorrect TLS certificate, which can cause validation failures. You can upgrade to a newer version of Python to solve this. For more information, see https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warningspost
SNIMissingWarningthis
C:\Python27\lib\requests\packages\urllib3\util\ssl_.py:133: InsecurePlatformWarning: A true SSLContext object is not available. This prevents urllib3 from configuring SSL appropriately and may cause certain SSL connections to fail. You can upgrade to a newer version of Python to solve this. For more information, see https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warningsurl
InsecurePlatformWarningspa