**對時間序列模型進行優化
1.首先將時序數據分解爲趨勢份量,季節週期份量和隨機份量
2.對趨勢份量使用ARIMA模型進行擬合
3.季節週期份量則使用歷史同期份量
4.隨機份量則是使用歷史同類的平均值進行預測
5.使用面向對象的方式,構造模型的類,自動選取最優的模型參數**css
import numpy as np import pandas as pd from datetime import datetime import matplotlib.pylab as plt from statsmodels.tsa.stattools import adfuller import pandas as pd import matplotlib.pyplot as plt import numpy as np from statsmodels.graphics.tsaplots import plot_acf,plot_pacf import sys from dateutil.relativedelta import relativedelta from copy import deepcopy from statsmodels.tsa.arima_model import ARMA import warnings warnings.filterwarnings("ignore")```
class arima_model:函數
def __init__(self,ts,maxLag = 9): self.data_ts = ts self.resid_ts = None self.predict_ts = None self.forecast_ts = None self.maxLag = maxLag self.p = maxLag self.q = maxLag self.properModel = None self.bic = sys.maxsize #計算最優的ARIMA模型,將相關結果賦給相應的屬性 def get_proper_model(self): self._proper_model() self.predict_ts = deepcopy(self.properModel.predict()) self.resid_ts = deepcopy(self.properModel.resid) self.forecast_ts = deepcopy(self.properModel.forecast()) #對於給定範圍內的p,q計算擬合得最好的arima模型,這裏是對差分好的數據進行擬合,故差分恆爲0 def _proper_model(self): for p in np.arange(self.maxLag): for q in np.arange(self.maxLag): model = ARMA(self.data_ts, order = (p,q)) try: results_ARMA = model.fit(disp = -1, method = "css") except: continue bic = results_ARMA.bic if bic < self.bic: self.p = p self.q = q self.properModel = results_ARMA self.bic = bic self.resid_ts = deepcopy(self.properModel.resid) self.predict_ts = self.properModel.predict() #參數肯定模型 def certain_model(self,p,q): model = ARMA(self.data_ts,order = (p,q)) try: self.properModel = model.fit(disp = -1,method = "css") self.p = p self.q = q self.bic = self.properModel.bic self.predict_ts = self.properModel.predict() self.resid_ts = deepcopy(self.properModel.resid) self.forecast_ts = self.properModel.forecast() except: print ("You can not fit the model with this parameter p,q")```
dateparse = lambda dates:pd.datetime.strptime(dates,'%Y-%m') #paese_dates指定日期在哪列 index_dates將年月日的哪一個做爲索引,date_parser將字符串轉爲日期 f = open("D:\福建\AirPassengers.csv") data = pd.read_csv(f, parse_dates=["Month"],index_col="Month",date_parser=dateparse) ts = data["#Passengers"]
def draw_ts(timeSeries,title): f = plt.figure(facecolor = "white") timeSeries.plot(color = "blue") plt.title(title) plt.show() def seasonal_decompose(ts): from statsmodels.tsa.seasonal import seasonal_decompose decomposition = seasonal_decompose(ts, model = "multiplicative") trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid draw_ts(ts,'origin') draw_ts(trend,'trend') draw_ts(seasonal,'seasonal') draw_ts(residual,'residual') return trend,seasonal,residual def testStationarity(ts): dftest = adfuller(ts) # 對上述函數求得的值進行語義描述 dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) for key,value in dftest[4].items(): dfoutput['Critical Value (%s)'%key] = value # print ("dfoutput",dfoutput) return dfoutput
ts_log = np.log(ts) trend,seasonal,residual = seasonal_decompose(ts_log) seasonal_arr = seasonal residual = residual.dropna() residual_mean = np.mean(residual.values) trend = trend.dropna()
代碼運行以下:
優化
#將原始數據分解爲趨勢份量,季節週期和隨機份量 #對trend進行平穩定檢驗 testStationarity(trend)
#對序列進行平穩定處理 trend_diff_1 = trend.diff(1) trend_diff_1 = trend_diff_1.dropna() draw_ts(trend_diff_1,'trend_diff_1') testStationarity(trend_diff_1) trend_diff_2 = trend_diff_1.diff(1) trend_diff_2 = trend_diff_2.dropna() draw_ts(trend_diff_2,'trend_diff_2') testStationarity(trend_diff_2)
#使用模型擬合趨勢份量 #使用模型參數的自動識別 model = arima_model(trend_diff_2) model.get_proper_model() predict_ts = model.properModel.predict() #還原數據,由於使用的是乘法模型,將趨勢份量還原以後須要乘以對應的季節週期份量和隨機份量 diff_shift_ts = trend_diff_1.shift(1) diff_recover_1 = predict_ts.add(diff_shift_ts) rol_shift_ts = trend.shift(1) diff_recover = diff_recover_1.add(rol_shift_ts) recover = diff_recover['1950-1':'1960-6'] * seasonal_arr['1950-1':'1960-6'] * residual_mean log_recover = np.exp(recover) draw_ts(log_recover,'log_recover')
#模型評價 ts_quantum = ts['1950-1':'1960-6'] plt.figure(facecolor = "white") log_recover.plot(color = "blue",label = "Predict") ts_quantum.plot(color = "red", label = "Original") plt.legend(loc = "best") plt.title("RMSE %.4f" % np.sqrt(sum((ts_quantum - log_recover) ** 2) / ts_quantum.size)) plt.show()