基於X11-ARIMA模型的時間序列分析

**對時間序列模型進行優化
1.首先將時序數據分解爲趨勢份量,季節週期份量和隨機份量
2.對趨勢份量使用ARIMA模型進行擬合
3.季節週期份量則使用歷史同期份量
4.隨機份量則是使用歷史同類的平均值進行預測
5.使用面向對象的方式,構造模型的類,自動選取最優的模型參數**css

import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pylab  as plt
from statsmodels.tsa.stattools import adfuller
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf 
import sys
from dateutil.relativedelta import relativedelta
from  copy import deepcopy
from statsmodels.tsa.arima_model import ARMA
import warnings
warnings.filterwarnings("ignore")```

定義ARIMA的類

class arima_model:函數

def __init__(self,ts,maxLag = 9):
    self.data_ts = ts
    self.resid_ts = None
    self.predict_ts = None
    self.forecast_ts = None
    self.maxLag = maxLag
    self.p = maxLag
    self.q = maxLag
    self.properModel = None
    self.bic = sys.maxsize

#計算最優的ARIMA模型,將相關結果賦給相應的屬性
def get_proper_model(self):
    self._proper_model()
    self.predict_ts = deepcopy(self.properModel.predict())
    self.resid_ts = deepcopy(self.properModel.resid)
    self.forecast_ts = deepcopy(self.properModel.forecast())

#對於給定範圍內的p,q計算擬合得最好的arima模型,這裏是對差分好的數據進行擬合,故差分恆爲0
def _proper_model(self):
    for p in np.arange(self.maxLag):
        for q in np.arange(self.maxLag):
            model = ARMA(self.data_ts, order = (p,q))
            try:
                results_ARMA = model.fit(disp = -1, method = "css")
            except:
                continue
            bic = results_ARMA.bic
            
            if  bic < self.bic:
                self.p = p
                self.q = q
                self.properModel = results_ARMA
                self.bic = bic
                self.resid_ts = deepcopy(self.properModel.resid)
                self.predict_ts = self.properModel.predict()

#參數肯定模型
def certain_model(self,p,q):
    model = ARMA(self.data_ts,order = (p,q))
    try:
        self.properModel = model.fit(disp = -1,method = "css")
        self.p = p
        self.q = q
        self.bic = self.properModel.bic
        self.predict_ts = self.properModel.predict()
        self.resid_ts = deepcopy(self.properModel.resid)
        self.forecast_ts = self.properModel.forecast()
    except:
        print ("You can not fit the model with this parameter p,q")```
dateparse = lambda dates:pd.datetime.strptime(dates,'%Y-%m')
#paese_dates指定日期在哪列 index_dates將年月日的哪一個做爲索引,date_parser將字符串轉爲日期
f = open("D:\福建\AirPassengers.csv")
data = pd.read_csv(f, parse_dates=["Month"],index_col="Month",date_parser=dateparse)
ts = data["#Passengers"]
def draw_ts(timeSeries,title):
    f = plt.figure(facecolor = "white")
    timeSeries.plot(color = "blue")
    plt.title(title)
    plt.show()

def seasonal_decompose(ts):
    from statsmodels.tsa.seasonal import seasonal_decompose
    decomposition = seasonal_decompose(ts, model = "multiplicative")
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid
    draw_ts(ts,'origin')
    draw_ts(trend,'trend')
    draw_ts(seasonal,'seasonal')
    draw_ts(residual,'residual')
    return trend,seasonal,residual
def testStationarity(ts):
    dftest = adfuller(ts)
    # 對上述函數求得的值進行語義描述
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
#     print ("dfoutput",dfoutput)        
    return dfoutput
ts_log = np.log(ts)
trend,seasonal,residual = seasonal_decompose(ts_log)
seasonal_arr = seasonal
residual = residual.dropna()
residual_mean = np.mean(residual.values)
trend = trend.dropna()

代碼運行以下:
圖片描述
圖片描述
圖片描述
圖片描述優化

#將原始數據分解爲趨勢份量,季節週期和隨機份量
#對trend進行平穩定檢驗
testStationarity(trend)

圖片描述

#對序列進行平穩定處理
trend_diff_1 = trend.diff(1)
trend_diff_1 = trend_diff_1.dropna()
draw_ts(trend_diff_1,'trend_diff_1')
testStationarity(trend_diff_1)
trend_diff_2 = trend_diff_1.diff(1)
trend_diff_2 = trend_diff_2.dropna()
draw_ts(trend_diff_2,'trend_diff_2')
testStationarity(trend_diff_2)
#使用模型擬合趨勢份量
#使用模型參數的自動識別
model = arima_model(trend_diff_2)
model.get_proper_model()
predict_ts = model.properModel.predict()

#還原數據,由於使用的是乘法模型,將趨勢份量還原以後須要乘以對應的季節週期份量和隨機份量
diff_shift_ts = trend_diff_1.shift(1)
diff_recover_1 = predict_ts.add(diff_shift_ts)
rol_shift_ts = trend.shift(1)
diff_recover = diff_recover_1.add(rol_shift_ts)
recover = diff_recover['1950-1':'1960-6'] * seasonal_arr['1950-1':'1960-6'] * residual_mean
log_recover = np.exp(recover)
draw_ts(log_recover,'log_recover')

圖片描述

#模型評價
ts_quantum = ts['1950-1':'1960-6']
plt.figure(facecolor = "white")
log_recover.plot(color = "blue",label = "Predict")
ts_quantum.plot(color = "red", label = "Original")
plt.legend(loc = "best")
plt.title("RMSE %.4f" % np.sqrt(sum((ts_quantum - log_recover) ** 2) / ts_quantum.size))
plt.show()

圖片描述

相關文章
相關標籤/搜索