本文主要是介紹從前程無憂上爬取崗位爲數據分析的職位,主要是五個字段,職位名稱、公司名稱、工做地點、薪資和發佈時間。同時把爬取下來的數據保存到mysql數據庫中。html
1 import requests 2 import pymysql 3 import re 4 5 6 # 鏈接數據庫並建立數據表 7 db = pymysql.connect('localhost', 'root', 'password', 'lookforjob') 8 cursor = db.cursor() 9 cursor.execute('drop table if exists DataAnalyst') 10 sql = """ 11 create table DataAnalyst 12 ( 13 PositionName VARCHAR(40), #職位名稱 14 CompanyName VARCHAR(40), #公司名稱 15 WorkingPlace VARCHAR(40), #工做地點 16 Salary VARCHAR(40), #薪資 17 ReleaseTime VARCHAR(40) #發佈時間 18 ) 19 """ 20 21 cursor.execute(sql) 22 23 def getHTMLText(page): 24 url = "https://search.51job.com/list/080200,000000,0000,00,9,99," \ 25 "%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590%25E5%25B8%2588,2," + str(page) + '.html' 26 try: 27 r = requests.get(url, timeout=30) 28 r.raise_for_status() 29 r.encoding = r.apparent_encoding 30 html = r.text 31 return html 32 except: 33 return "" 34 35 36 def get(html): 37 reg = re.compile( 38 r'class="t1 ">.*?<a target="_blank" title="(.*?)".*? <span class="t2"><a target="_blank" title="('r'.*?)".*?<span class="t3">(.*?)</span>.*?<span class="t4">(.*?)</span>.*? <span class="t5">('r'.*?)</span>', 39 re.S) 40 items = re.findall(reg, html) 41 42 sql = """ 43 INSERT INTO lookforjob.dataanalyst values(%s,%s,%s,%s,%s) 44 """ 45 for i in range(len(items)): 46 cursor.execute(sql, items[i]) 47 print(items[i]) 48 db.commit() 49 50 51 if __name__ == "__main__": 52 for each in range(1, 7): 53 get(getHTMLText(each)) 54 cursor.close()