1、閒着無聊,天天都是那麼無聊,感受我算是廢了,真坑想出去實習那麼坑javascript
2、直接放源代碼(寫的爛不要噴我)html
import re#正則 import time import requests import random import pandas as pd import numpy as np import matplotlib.pyplot as plt import urllib header={ 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Content-Length': '1599', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Host': 'hotel.elong.com', 'Origin': 'http://hotel.elong.com', 'Pragma': 'no-cache', 'Referer': 'http://hotel.elong.com/beijing/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'} for n in range(1,6): dat={'jl': '北京+上海+廣州+深圳+西安', 'kw': 'GIS開發', 'p':n, 'sm': '0', 'sg': 'a20b0e245eac4aa1b4844dca099fc75a'} url='https://sou.zhaopin.com/jobs/searchresult.ashx?' print(n) html= requests.get(url,params=dat).text # 正則表達式進行解析 pattern1 = re.findall('href="http://jobs.zhaopin.com/.*?.htm" target="_blank">(.*?)</a>',html ) #pattern=[] #for x in pattern1: # if ('<b>'in x)or('</b>'in x): # strt=str(x) # ss=strt.replace('</b>','') # pattern.append(ss) # else: # pattern.append(x) pattern2 = re.findall('class="gsmc"><a href="http://company.zhaopin.com/.*?.htm" target="_blank">(.*?)</a>',html) pattern3 = re.findall( 'class="zwyx">(.*?)</td>',html) pattern4 = re.findall('class="newlist_deatil_two"><span>(.*?)</span>',html) print(pattern1) print('***********************') #print(ss) print(len(pattern1)) data=list(map(lambda x:(pattern1[x],pattern2[x],pattern3[x],pattern4[x]),range(20))) data2=pd.DataFrame(data) data2.to_csv('C:\\Users\\你若成風618\\Desktop\\aa\\2.csv',header=False,index=False,mode='a+')
3、感受很爛,後面準備優化一下,整點圖表,數據分析下java
4、忘了配個圖python