Python爬蟲(一)——開封市58同城租房信息

 

 

代碼:url

 1 # coding=utf-8
 2 import sys  3 import csv  4 import requests  5 from bs4 import BeautifulSoup  6 
 7 reload(sys)  8 sys.setdefaultencoding('utf-8')  9 # 請求頭設置
10 
11 def download(url): 12     db_data = requests.get(url) 13     soup = BeautifulSoup(db_data.text, 'lxml') 14     titles = soup.select( 15         'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > h2 > a:nth-of-type(1)') 16     houses = soup.select('body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.room') 17     oneaddresss = soup.select( 18         'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(1)') 19     twoaddresss = soup.select( 20         'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(2)') 21     prices = soup.select( 22         'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.listliright > div.money > b') 23     for title, house, oneaddress, twoaddress, price in zip(titles, houses, oneaddresss, twoaddresss, prices): 24         data = [ 25  ( 26                 str(title.string).replace(' ', '').replace('\n', ''), 27                 house.get_text().split(' ')[0].replace(' ', '').replace("\n", ""), 28                 house.get_text().split(' ')[-1].replace(' ', '').replace("\n", ""), 29                 oneaddress.get_text().replace(' ', '').replace("\n", ""), 30                 twoaddress.get_text().replace(' ', '').replace("\n", ""), 31                 price.get_text().replace(' ', '').replace("\n", "") 32  ) 33  ] 34 
35         csvfile = open('kf.csv', 'ab') 36         writer = csv.writer(csvfile) 37         print('write one house') 38  writer.writerows(data) 39  csvfile.close() 40 
41 
42 # 初始化csv文件
43 def info(): 44     csvinfo = open('kf.csv', 'ab') 45     begcsv = csv.writer(csvinfo) 46     begcsv.writerow(['title', 'house', 'area', 'address1', 'address2', 'price']) 47  csvinfo.close() 48 
49 
50 if __name__ == '__main__': 51  info()
52     download(url)

 

 

 

相關文章
相關標籤/搜索