7,城市氣候與海洋的關係研究

導入包算法

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

import matplotlib.pyplot as plt
%matplotlib inline

# 設置顯示漢字
import sys
reload(sys)
sys.setdefaultencoding('utf8')


from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定默認字體
mpl.rcParams['axes.unicode_minus'] = False # 解決保存圖像是負號'-'顯示爲方塊的問題

  

2,導入數據各個海濱城市數據app

ferrara1 = pd.read_csv('./ferrara_150715.csv')
ferrara2 = pd.read_csv('./ferrara_250715.csv')
ferrara3 = pd.read_csv('./ferrara_270615.csv')
ferrara=pd.concat([ferrara1,ferrara1,ferrara1],ignore_index=True)

torino1 = pd.read_csv('./torino_150715.csv')
torino2 = pd.read_csv('./torino_250715.csv')
torino3 = pd.read_csv('./torino_270615.csv')
torino = pd.concat([torino1,torino2,torino3],ignore_index=True) 

mantova1 = pd.read_csv('./mantova_150715.csv')
mantova2 = pd.read_csv('./mantova_250715.csv')
mantova3 = pd.read_csv('./mantova_270615.csv')
mantova = pd.concat([mantova1,mantova2,mantova3],ignore_index=True) 

milano1 = pd.read_csv('./milano_150715.csv')
milano2 = pd.read_csv('./milano_250715.csv')
milano3 = pd.read_csv('./milano_270615.csv')
milano = pd.concat([milano1,milano2,milano3],ignore_index=True) 

ravenna1 = pd.read_csv('./ravenna_150715.csv')
ravenna2 = pd.read_csv('./ravenna_250715.csv')
ravenna3 = pd.read_csv('./ravenna_270615.csv')
ravenna = pd.concat([ravenna1,ravenna2,ravenna3],ignore_index=True)

asti1 = pd.read_csv('./asti_150715.csv')
asti2 = pd.read_csv('./asti_250715.csv')
asti3 = pd.read_csv('./asti_270615.csv')
asti = pd.concat([asti1,asti2,asti3],ignore_index=True)

bologna1 = pd.read_csv('./bologna_150715.csv')
bologna2 = pd.read_csv('./bologna_250715.csv')
bologna3 = pd.read_csv('./bologna_270615.csv')
bologna = pd.concat([bologna1,bologna2,bologna3],ignore_index=True)

piacenza1 = pd.read_csv('./piacenza_150715.csv')
piacenza2 = pd.read_csv('./piacenza_250715.csv')
piacenza3 = pd.read_csv('./piacenza_270615.csv')
piacenza = pd.concat([piacenza1,piacenza2,piacenza3],ignore_index=True)

cesena1 = pd.read_csv('./cesena_150715.csv')
cesena2 = pd.read_csv('./cesena_250715.csv')
cesena3 = pd.read_csv('./cesena_270615.csv')
cesena = pd.concat([cesena1,cesena2,cesena3],ignore_index=True)

faenza1 = pd.read_csv('./faenza_150715.csv')
faenza2 = pd.read_csv('./faenza_250715.csv')
faenza3 = pd.read_csv('./faenza_270615.csv')
faenza = pd.concat([faenza1,faenza2,faenza3],ignore_index=True)
faenza.head()

 

4,去除沒用的列機器學習

city_list = [ferrara,torino,mantova,milano,ravenna,asti,bologna,piacenza,cesena,faenza]
for city in city_list:
    city.drop(labels='Unnamed: 0',axis=1,inplace=True)

5,顯示最高溫度於離海遠近的關係(觀察多個城市) 學習

city_max_temp = []
city_dist = []
for city in city_list:
    max_temp = city['temp'].max()
    city_max_temp.append(max_temp)
    dist = city['dist'][0]
    city_dist.append(dist)

#查看各個城市的最高溫度數據
city_max_temp

 

plt.scatter(city_dist,city_max_temp)
plt.xlabel('距離')
plt.ylabel('最高溫度')
plt.title('距離和溫度之間的關係圖')

觀察發現,離海近的能夠造成一條直線,離海遠的也能造成一條直線。字體

- 分別以100千米和50千米爲分界點,劃分爲離海近和離海遠的兩組數據(近海:小於100  遠海:大於50)
#找出全部的近海城市(溫度和距離)
np_city_dist = np.array(city_dist)
np_city_max_temp = np.array(city_max_temp)

near_condition = np_city_dist < 100
near_city_dist = np_city_dist[near_condition]
near_city_max_temp = np_city_max_temp[near_condition]

plt.scatter(near_city_dist,near_city_max_temp)

機器學習

- 算法模型對象:特殊的對象.在該對象中已經集成好個一個方程(尚未求出解的方程). - 模型對象的做用:經過方程實現預測或者分類 - 樣本數據(df,np): - 特徵數據:自變量 - 目標(標籤)數據:因變量 - 模型對象的分類: - 有監督學習:模型須要的樣本數據中存在特徵和目標 - 無監督學習:模型須要的樣本數據中存在特徵 - 半監督學習:模型須要的樣本數據部分須要有特徵和目標,部分只須要特徵數據 - sklearn模塊:封裝了多種模型對象.

 導入sklearn,創建線性迴歸算法模型對象spa

#1.導包
from sklearn.linear_model import LinearRegression
#2.實例化模型對象
linner = LinearRegression()
#3.提取樣本數據
#4.訓練模型
linner.fit(near_city_dist.reshape(-1,1),near_city_max_temp)
#5.預測
linner.predict(38)  
#array([33.16842645])

linner.score(near_city_dist.reshape(-1,1),near_city_max_temp)
0.77988083971852

#繪製迴歸曲線
x = np.linspace(10,70,num=100)
y = linner.predict(x.reshape(-1,1))

plt.scatter(near_city_dist,near_city_max_temp)
plt.scatter(x,y,s=0.2)

  

#將近海和遠海的散點圖合併顯示
plt.scatter(far_city_dists,far_max_temps,s=100)
plt.scatter(near_city_dists,near_max_temps)
plt.scatter(far_city_dists,far_max_temps)
plt.plot(x,y)
plt.scatter(near_city_dists,near_max_temps)
plt.plot(x1,y1)
plt.title('最高溫度和距海洋距離的關係圖',fontsize=20)
plt.xlabel('距海洋距離',fontsize=15)
plt.ylabel('最高溫度',fontsize=15)

  

相關文章
相關標籤/搜索