python爬蟲-鏈家租房信息獲取

#導入須要用到的模塊html

import requests
import pymysql
import time
from bs4 import BeautifulSoup
import tkinter as tk
import xlwt
import importlib,sys
importlib.reload(sys)
from PIL import Image,ImageTkmysql

#背景圖片
def resize( w_box, h_box, pil_image):
"""調整圖片大小,適應窗體大小"""
"""arg:: w_box:new width h_box:new height pil_image:img"""
w, h = pil_image.size #獲取圖像的原始大小
f1 = 1.0*w_box/w
f2 = 1.0*h_box/h
factor = min([f1, f2])
width = int(w*factor)
height = int(h*factor)
return pil_image.resize((width, height), Image.ANTIALIAS)sql


#獲取url下的頁面內容,返回soup對象
def get_page(url):
responce = requests.get(url)
soup = BeautifulSoup(responce.text,'html.parser')
return soup數據庫

#封裝成函數,做用是獲取列表頁下面的全部租房頁面的連接,返回一個鏈接列表
def get_links(link_url):
soup = get_page(link_url)
links_div = soup.find_all('div',class_="pic-panel")
links = [div.a.get('href') for div in links_div]
return linkscanvas

def get_house_info(house_url):
soup = get_page(house_url)
price = soup.find('span',class_='total').text #價格
unit = soup.find('span',class_='unit').text.strip() #單位 strip()函數去空格
house_info = soup.find_all('p')
area = house_info[0].text[3:] #面積
layout = house_info[1].text[5:] #戶型
floor = house_info[2].text[3:] #樓層
towards = house_info[3].text[5:] #朝向
subway = house_info[4].text[3:] #地鐵
uptown = house_info[5].text[3:-8].strip() #小區
location = house_info[6].text[3:] #位置
info ={
'價格':price,
'單位':unit,
'面積':area,
'戶型':layout,
'樓層':floor,
'朝向':towards,
'地鐵':subway,
'小區':uptown,
'位置':location
}
return info服務器

DATABASE = {
'host':'localhost',#若是是遠程數據庫,此處爲遠程服務器的ip地址
'database':'examination',
'user':'root',
'password':'123456',
'charset':'utf8mb4'
}
def get_db(setting):
return pymysql.connect(**setting)函數

def insert(db,house):
table_name=cityEntry.get()+'_'+localEntry.get()
values = "'{}',"* 8 +"'{}'"
sql_values = values.format(house['價格'],house['單位'],house['面積'],house['戶型'],
house['樓層'],house['朝向'],house['地鐵'],house['小區'],
house['位置'])

sql = """
insert into {0}(price,unit,area,layout,floor,towards,subway,uptown,location)
values({1})
""".format(table_name,sql_values)
cursor = db.cursor()
cursor.execute(sql)
db.commit()fetch

def creatTable(db):
table_name=cityEntry.get()+'_'+localEntry.get()
sql = """
CREATE TABLE `{}` (
`price` varchar(80) DEFAULT NULL,
`unit` varchar(80) DEFAULT NULL,
`area` varchar(80) DEFAULT NULL,
`layout` varchar(80) DEFAULT NULL,
`floor` varchar(80) DEFAULT NULL,
`towards` varchar(80) DEFAULT NULL,
`subway` varchar(80) DEFAULT NULL,
`uptown` varchar(80) DEFAULT NULL,
`location` varchar(80) DEFAULT NULL
);""".format(table_name)
cursor = db.cursor()
cursor.execute(sql)
db.commit()

def main():
db = get_db(DATABASE)
try:
creatTable(db)
except:
print("數據庫已存在")
passui

num = int(numberEntry.get())
for i in range(num):
links = get_links("https://"+dict_loc['{}'.format(cityEntry.get())]+".lianjia.com/zufang/"+dict_loc['{}'.format(localEntry.get())]+"/pg{}/".format(i))
for link in links:
time.sleep(0.1)
house = get_house_info(link)
insert(db,house)
lableInit.config(text="{}市{}區數據獲取成功".format(cityEntry.get(),localEntry.get()))
print('DONE')url


def quitw():
top.destroy()


def export():
db = get_db(DATABASE)
cursor = db.cursor()
table_name=cityEntry.get()+'_'+localEntry.get()
count = cursor.execute('select * from '+table_name)
# 重置遊標的位置
cursor.scroll(0,mode='absolute')
# 搜取全部結果
results = cursor.fetchall()

# 獲取MYSQL裏面的數據字段名稱
fields = cursor.description
workbook = xlwt.Workbook()
sheet = workbook.add_sheet('table_'+table_name,cell_overwrite_ok=True)

# 寫上字段信息
for field in range(0,len(fields)):
sheet.write(0,field,fields[field][0])

# 獲取並寫入數據段信息
row = 1
col = 0
for row in range(1,len(results)+1):
for col in range(0,len(fields)):
sheet.write(row,col,u'%s'%results[row-1][col])

workbook.save(r'C:\Users\Lenovo\Desktop\{}.xls'.format(table_name))
lableInit.config(text="共{}條數據導出成功!".format(count))


#構造字典
dict_loc = {
'北京':'bj',
'東城':'dongcheng',
'西城':'xicheng',
'朝陽':'chaoyang',
'海淀':'haidian',
'豐臺':'fengtai',
'上海':'sh',
'浦東':'pudong',
'寶山':'baoshan',
'杭州':'hz',
'西湖':'xihu',
'下城':'xiacheng',
'餘杭':'yuhang',
'富陽':'fuyang',
'鄭州':'zz',
'金水':'jinshui',
'中原':'zhongyuan',
'二七':'erqi',
'高新':'gaoxin',
'新鄭市':'xinzhengshi',
'洛陽':'luoyang',
'嵩縣':'songxian',
'新鄉':'xinxiang',
'牧野':'muye'
}

if __name__ == "__main__":
top = tk.Tk()
top.title("鏈家")
top.resizable(width=False,height=False)#設置不可拉伸
top.geometry("410x510") #設置窗口大小

#添加背景圖片
canvas = tk.Canvas(top) #設置canvas
pil_image = Image.open('lianjiabg2.png') #打開背景圖片
pil_image_resize = resize(410,510,pil_image) #將它放大保存
im = ImageTk.PhotoImage(pil_image_resize)
canvas.create_image(205,255,image = im) #將圖片加載到canvas來
canvas.place(x=0,y=0,height=510,width=410,anchor='nw')#放到屏幕當中

#圖片
photo = tk.PhotoImage(file="F:\CodeWorkspace\lianjia.png")
imgLabel = tk.Label(top,image=photo,bg='#fbfbfb',width=410)
imgLabel.grid(row=0,column=0,columnspan=2)

#lable
Label = tk.Label(top,fg='#589e6e',bg='#f9f7ba',font = '隸書 -20 ', text = "請輸入您要查詢的地區")
Label.grid(row=1,column=0,columnspan=2,pady=5)

#市
cityEntry = tk.Entry(top,width=12)
cityEntry.grid(row=2,column=0,padx=5,pady=10,sticky="E")

cityLabel = tk.Label(top,fg='#589e6e',font = '隸書 -20 ',bg='#d9f3e1', text = "市")
cityLabel.grid(row=2,column=1,sticky="W")

#區
localEntry = tk.Entry(top,width=12)
localEntry.grid(row=3,column=0,padx=5,pady=10,sticky="E")

localLabel = tk.Label(top,fg='#589e6e',font = '隸書 -20 ',bg='#d9f3e1', text = "區")
localLabel.grid(row=3,column=1,sticky="W")

#lable2
Label2 = tk.Label(top,fg='#589e6e',bg='#f9f7ba',font = '隸書 -20 ', text = "請輸入您要查詢的頁數\n(每頁30條數據)")
Label2.grid(row=4,column=0,columnspan=2)

#信息數
numberEntry = tk.Entry(top,width=12)
numberEntry.grid(row=5,column=0,padx=5,pady=10,sticky="E")

numberLabel = tk.Label(top,fg='#589e6e',font = '隸書 -20 ',bg='#abe1c1', text = "頁")
numberLabel.grid(row=5,column=1,sticky="W")

#提交
submit = tk.Button(top,bg='#589e6e',fg='white',width=12,height=1,font = 'Helvetica -15 bold', text="數據獲取",command=main)
submit.grid(row=6,column=0,columnspan=2,padx=3,pady=5)

#lable3
Label3 = tk.Label(top,fg='#589e6e',bg='#f9f7ba',font = '隸書 -20 ', text = "將數據導出爲Excel格式")
Label3.grid(row=7,column=0,columnspan=2)

#導出excel
export = tk.Button(top,bg='#f9a33f',fg='white',width=12,height=1,font = 'Helvetica -15 bold', text="導出數據",command=export)
export.grid(row=8,column=0,columnspan=2,padx=3,pady=5)

#退出
quitB = tk.Button(top,bg='#ff5757',fg='white',width=12,height=1,font = 'Helvetica -15 bold', text="退出",command=quitw)
quitB.grid(row=9,column=0,columnspan=2,padx=3,pady=3)

#反饋
lableInitTitle = tk.Label(top,font = '正楷 -12',text="* * * 提 示 信 息 * * *",width=40,fg="#f9a33f")
lableInitTitle.grid(row=10,column=0,columnspan=2,ipady=5)

lableInit = tk.Label(top,bg='#d9f3e1',font = '正楷 -12 ',text="請在上方輸入您要查詢的信息",width=40,fg="red") lableInit.grid(row=11,column=0,columnspan=2,ipady=5)

相關文章
相關標籤/搜索