這篇文章將講解如何使用lstm進行時間序列方面的預測,重點講lstm的應用,原理部分可參考以下兩篇文章:
Understanding LSTM Networks LSTM學習筆記
編程環境:python3.5,tensorflow 1.0
本文所用的數據集來自於kesci平臺,由雲腦機器學習實戰訓練營提供:真實業界數據的時間序列預測挑戰
數據集採用來自業界多組相關時間序列(約40組)與外部特徵時間序列(約5組)。本文只使用其中一組數據進行建模。
加載常用的庫:
- <span style="font-size:14px;">#加載數據分析常用庫
- import pandas as pd
- import numpy as np
- import tensorflow as tf
- from sklearn.metrics import mean_absolute_error,mean_squared_error
- from sklearn.preprocessing import MinMaxScaler
- import matplotlib.pyplot as plt
- % matplotlib inline
- import warnings
- warnings.filterwarnings('ignore')</span>
數據顯示:
- path = '../input/industry/industry_timeseries/timeseries_train_data/11.csv'
- data11 = pd.read_csv(path,names=['年','月','日','當日最高氣溫','當日最低氣溫','當日平均氣溫','當日平均溼度','輸出'])
- data11.head()
|
年 |
月 |
日 |
當日最高氣溫 |
當日最低氣溫 |
當日平均氣溫 |
當日平均溼度 |
輸出 |
0 |
2015 |
2 |
1 |
1.9 |
-0.4 |
0.7875 |
75.000 |
814.155800 |
1 |
2015 |
2 |
2 |
6.2 |
-3.9 |
1.7625 |
77.250 |
704.251112 |
2 |
2015 |
2 |
3 |
7.8 |
2.0 |
4.2375 |
72.750 |
756.958978 |
3 |
2015 |
2 |
4 |
8.5 |
-1.2 |
3.0375 |
65.875 |
640.645401 |
4 |
2015 |
2 |
5 |
7.9 |
-3.6 |
1.8625 |
55.375 |
631.725130 |
加載數據:
- ##load data(本文以第一個表爲例,其他表類似,不再贅述)
- f=open('../input/industry/industry_timeseries/timeseries_train_data/11.csv')
- df=pd.read_csv(f) #讀入數據
- data=df.iloc[:,3:8].values #取第3-7列
定義常量並初始化權重:
- #定義常量
- rnn_unit=10 #hidden layer units
- input_size=4
- output_size=1
- lr=0.0006 #學習率
- tf.reset_default_graph()
- #輸入層、輸出層權重、偏置
- weights={
- 'in':tf.Variable(tf.random_normal([input_size,rnn_unit])),
- 'out':tf.Variable(tf.random_normal([rnn_unit,1]))
- }
- biases={
- 'in':tf.Variable(tf.constant(0.1,shape=[rnn_unit,])),
- 'out':tf.Variable(tf.constant(0.1,shape=[1,]))
- }
分割數據集,將數據分爲訓練集和驗證集(最後90天做驗證,其他做訓練):
- def get_data(batch_size=60,time_step=20,train_begin=0,train_end=487):
- batch_index=[]
-
- scaler_for_x=MinMaxScaler(feature_range=(0,1)) #按列做minmax縮放
- scaler_for_y=MinMaxScaler(feature_range=(0,1))
- scaled_x_data=scaler_for_x.fit_transform(data[:,:-1])
- scaled_y_data=scaler_for_y.fit_transform(data[:,-1])
-
- label_train = scaled_y_data[train_begin:train_end]
- label_test = scaled_y_data[train_end:]
- normalized_train_data = scaled_x_data[train_begin:train_end]
- normalized_test_data = scaled_x_data[train_end:]
-
- train_x,train_y=[],[] #訓練集x和y初定義
- for i in range(len(normalized_train_data)-time_step):
- if i % batch_size==0:
- batch_index.append(i)
- x=normalized_train_data[i:i+time_step,:4]
- y=label_train[i:i+time_step,np.newaxis]
- train_x.append(x.tolist())
- train_y.append(y.tolist())
- batch_index.append((len(normalized_train_data)-time_step))
-
- size=(len(normalized_test_data)+time_step-1)//time_step #有size個sample
- test_x,test_y=[],[]
- for i in range(size-1):
- x=normalized_test_data[i*time_step:(i+1)*time_step,:4]
- y=label_test[i*time_step:(i+1)*time_step]
- test_x.append(x.tolist())
- test_y.extend(y)
- test_x.append((normalized_test_data[(i+1)*time_step:,:4]).tolist())
- test_y.extend((label_test[(i+1)*time_step:]).tolist())
-
- return batch_index,train_x,train_y,test_x,test_y,scaler_for_y
定義LSTM的網絡結構:
- #——————————————————定義神經網絡變量——————————————————
- def lstm(X):
- batch_size=tf.shape(X)[0]
- time_step=tf.shape(X)[1]
- w_in=weights['in']
- b_in=biases['in']
- input=tf.reshape(X,[-1,input_size]) #需要將tensor轉成2維進行計算,計算後的結果作爲隱藏層的輸入
- input_rnn=tf.matmul(input,w_in)+b_in
- input_rnn=tf.reshape(input_rnn,[-1,time_step,rnn_unit]) #將tensor轉成3維,作爲lstm cell的輸入
- cell=tf.contrib.rnn.BasicLSTMCell(rnn_unit)
- #cell=tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(rnn_unit)
- init_state=cell.zero_state(batch_size,dtype=tf.float32)
- output_rnn,final_states=tf.nn.dynamic_rnn(cell, input_rnn,initial_state=init_state, dtype=tf.float32) #output_rnn是記錄lstm每個輸出節點的結果,final_states是最後一個cell的結果
- output=tf.reshape(output_rnn,[-1,rnn_unit]) #作爲輸出層的輸入
- w_out=weights['out']
- b_out=biases['out']
- pred=tf.matmul(output,w_out)+b_out
- return pred,final_states
模型訓練與預測:
- #——————————————————訓練模型——————————————————
- def train_lstm(batch_size=80,time_step=15,train_begin=0,train_end=487):
- X=tf.placeholder(tf.float32, shape=[None,time_step,input_size])
- Y=tf.placeholder(tf.float32, shape=[None,time_step,output_size])
- batch_index,train_x,train_y,test_x,test_y,scaler_for_y = get_data(batch_size,time_step,train_begin,train_end)
- pred,_=lstm(X)
- #損失函數
- loss=tf.reduce_mean(tf.square(tf.reshape(pred,[-1])-tf.reshape(Y, [-1])))
- train_op=tf.train.AdamOptimizer(lr).minimize(loss)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- #重複訓練5000次
- iter_time = 5000
- for i in range(iter_time):
- for step in range(len(batch_index)-1):
- _,loss_=sess.run([train_op,loss],feed_dict={X:train_x[batch_index[step]:batch_index[step+1]],Y:train_y[batch_index[step]:batch_index[step+1]]})
- if i % 100 == 0:
- print('iter:',i,'loss:',loss_)
- ####predict####
- test_predict=[]
- for order:none;background-color:inherit;">1]]})
- if i % 100 == 0:
- print('iter:',i,'loss:',loss_)
- ####predict####
- test_predict=[]
- for step in range(len(test_x)):
- prob=sess.run(pred,feed_dict={X:[test_x[step]]})
- predict=prob.reshape((-1))
- test_predict.extend(predict)
-
- test_predict = scaler_for_y.inverse_transform(test_predict)
- test_y = scaler_for_y.inverse_transform(test_y)
- rmse=np.sqrt(mean_squared_error(test_predict,test_y))
- mae = mean_absolute_error(y_pred=test_predict,y_true=test_y)
- print ('mae:',mae,' rmse:',rmse)
- return test_predict
調用train_lstm()函數,完成模型訓練與預測的過程,並統計驗證誤差(mae和rmse):
調用train_lstm()函數,完成模型訓練與預測的過程,並統計驗證誤差(mae和rmse):
- for step in range(len(test_x)):