LSTM時間序列預測

關於時間序列預測

你可能常常會遇到這樣的問題，給你一個數據集，要你預測下一個時刻的值是多少？以下圖所示，這種數據每每並無規律可言，也不可能用一個簡單的n階模型去擬合。老實說，之前我遇到這種問題都是直接上灰色模型，可是用的多了就感受會有點問題。其它還有一些模型比方說ARAM、ARIRM我沒有試過。這篇文章主要講解用LSTM如何進行時間序列預測python

數據

數據直接放在代碼裏，省去了下載文件並讀取的麻煩。這組數據是首都國際機場1949-01~1960-12這12年來的客流量，一共12*12=144個月。咱們選取前9年的數據（75%）做爲Train Data，後3年的數據（25%）做爲Test Data。原本還須要Val Data的，但因爲數據量比較少，並且不用搞得那麼麻煩，因此就不作Validation了。因爲我觀察到的客流量變化週期是一年的，由於我爲客流數據加上了年、月這兩個維度的標記網絡

def load_data():
    # passengers number of international airline, 1949-01~1960-12 per month
    seq_number = np.array(
        [112., 118., 132., 129., 121., 135., 148., 148., 136., 119., 104.,
         118., 115., 126., 141., 135., 125., 149., 170., 170., 158., 133.,
         114., 140., 145., 150., 178., 163., 172., 178., 199., 199., 184.,
         162., 146., 166., 171., 180., 193., 181., 183., 218., 230., 242.,
         209., 191., 172., 194., 196., 196., 236., 235., 229., 243., 264.,
         272., 237., 211., 180., 201., 204., 188., 235., 227., 234., 264.,
         302., 293., 259., 229., 203., 229., 242., 233., 267., 269., 270.,
         315., 364., 347., 312., 274., 237., 278., 284., 277., 317., 313.,
         318., 374., 413., 405., 355., 306., 271., 306., 315., 301., 356.,
         348., 355., 422., 465., 467., 404., 347., 305., 336., 340., 318.,
         362., 348., 363., 435., 491., 505., 404., 359., 310., 337., 360.,
         342., 406., 396., 420., 472., 548., 559., 463., 407., 362., 405.,
         417., 391., 419., 461., 472., 535., 622., 606., 508., 461., 390.,
         432.], dtype=np.float32)
    # plt.plot(seq_number)
    # plt.ion()
    seq_number = seq_number[:, np.newaxis] # add a new dimension
    
    # 12 years
    seq_year = np.arange(12)
    # 12 month
    seq_month = np.arange(12)
    seq_year_month = np.transpose(
    	[np.repeat(seq_year, len(seq_month)),
         np.tile(seq_month, len(seq_year))],
    )
    seq = np.concatenate((seq_number, seq_year_month), axis=1)

    # normalization
    seq = (seq - seq.mean(axis=0)) / seq.std(axis=0)
    return seq

總的數據維度是(144, 3)，即144個月的[客流量，年份，月份]這3個維度的數據。而且我對數據進行了歸一化處理app

模型

咱們但願輸入前9年的數據，讓LSTM預測後3年的客流，那麼咱們能夠先用前9年中每月的數據訓練LSTM，讓它根據前幾個月預測下一個月的客流。等訓練完成後，咱們讓LSTM根據前9年的數據預測出下一個月的客流，把剛剛輸出的預測客流做爲輸入，迭代求得後3年的客流函數

請注意，一般狀況下Tensor的第一個維度是批次大小batch size，可是PyTorch建議咱們輸入循環神經網絡的時候，Tensor的第一個維度是序列長度seq len，第二個維度纔是batch size工具

對於這個客流數據，seq_len指的是時間序列的長度，這裏前9年，共108個月，則seq_len=108。input_dim指的是輸入的維度，這裏輸入數據由三個維度構成，則input_dim=3ui

接下來咱們就能夠肯定LSTM的結構了spa

lstm = nn.LSTM(input_dim, mid_dim, mid_layers)
# input_dim 指的是LSTM輸入Tensor的維度，根據咱們的數據已經肯定了這個值是3
# mid_dim 指的是LSTM三個門(gaee)的網絡寬度，也是LSTM輸出Tensor的維度
# mid_layers 指的是LSTM內部各個gate使用的全鏈接層的數量，通常設爲1或2

x = torch.randn(seq_len, batch_size, input_dim)
output = lstm(x)
assert output.size() == (seq_len, batch_size, mid_dim)

mid_layers通常設置爲1或者2：理論上足夠寬（神經元個數足夠多），而且至少存在一層具備任何一種"擠壓"性質的激活函數的2層全鏈接層就能擬合任何的連續函數code

爲了進行時間序列預測，咱們在LSTM後面街上兩層全鏈接層（1層也行），用於改變最終LSTM輸出Tensor的維度。咱們只須要預測客流量這一個值，所以out_dim=1orm

fc = nn.Sequential(
	nn.Linear(mid_dim, mid_dim)
    nn.ReLU(),
    nn.Linear(mid_dim, out_dim),
)

x = output_of_LSTM
seq_len, batch_size, mid_dim = x.shape
x = x.view(-1, mid_dim)
output_of_fc = fc(x)
output_of_fc = output_of_fc.view(seq_len, batch_size, out_dim)

訓練

同一批次中序列長度不一樣，須要使用from torch.nn.utils.rnn import pad_sequenceblog

咱們只有一組訓練數據，即前9年的客流量。咱們能夠在同一批次中，訓練LSTM預測不一樣月份的客流量。1~t月的輸入對應了t+1月的客流量。因爲同一批次裏面訓練序列長度不統一，直接在末尾補0的操做不優雅，因此咱們須要藉助torch 自帶的工具 pad_sequence的協助，具體以下

var_x = torch.tensor(train_x, dtype=torch.float32, device=device)
var_y = torch.tensor(train_y, dtype=torch.float32, device=device)

batch_var_x = list()
batch_var_y = list()

for i in range(batch_size):
    j = train_size - i
    batch_var_x.append(var_x[j:])
    batch_var_y.append(var_y[j:])

from torch.nn.utils.rnn import pad_sequence
batch_var_x = pad_sequence(batch_var_x)
batch_var_y = pad_sequence(batch_var_y)

放入pad_sequence的序列必須從長到短放置，隨着反向傳播的進行，PyTorch會逐步忽略完成梯度計算的短序列，具體解釋請看PyTorch官網

criterion = nn.MSELoss()  # L2_loss

for e in range(epochs):
    out = net(batch_var_x)

    loss = criterion(out, batch_var_y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

評估

就像前面所說的：使用前9年的數據做爲輸入，預測獲得下一個與的客流，並將此預測結果加到輸入序列中，從而逐步預測後3年的客流。就像修路同樣，走在剛剛鋪好的路面往前推動

最終完整代碼

import numpy as np
import torch
from torch import nn
import matplotlib.pyplot as plt

def run_train_lstm():
    inp_dim = 3
    out_dim = 1
    mid_dim = 8
    mid_layers = 1
    batch_size = 12 * 4
    mod_dir = '.'

    '''load data'''
    data = load_data()
    data_x = data[:-1, :] # 0~142
    data_y = data[1:, 0] # 1~143
    assert data_x.shape[1] == inp_dim

    train_size = int(len(data_x) * 0.75)

    train_x = data_x[:train_size]
    train_y = data_y[:train_size]
    train_x = train_x.reshape((train_size, inp_dim))
    train_y = train_y.reshape((train_size, out_dim))

    '''build model'''
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = RegLSTM(inp_dim, out_dim, mid_dim, mid_layers).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)

    '''train'''
    var_x = torch.tensor(train_x, dtype=torch.float32).to(device)
    var_y = torch.tensor(train_y, dtype=torch.float32).to(device)

    batch_var_x = list()
    batch_var_y = list()

    for i in range(batch_size):
        j = train_size - i
        batch_var_x.append(var_x[j:])
        batch_var_y.append(var_y[j:])
    
    from torch.nn.utils.rnn import pad_sequence
    batch_var_x = pad_sequence(batch_var_x)
    batch_var_y = pad_sequence(batch_var_y)

    print("Training Start")
    for e in range(500):
        out = net(batch_var_x)
    
        loss = criterion(out, batch_var_y)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
        if e % 64 == 0:
            print('Epoch: {:4}, Loss: {:.5f}'.format(e, loss.item()))
    torch.save(net.state_dict(), '{}/net.pth'.format(mod_dir))
    print("Save in:", '{}/net.pth'.format(mod_dir))

    '''eval'''
    net.load_state_dict(torch.load('{}/net.pth'.format(mod_dir), map_location=lambda storage, loc: storage))
    net = net.eval()

    test_x = data_x.copy()
    test_x[train_size:, 0] = 0
    test_x = test_x[:, np.newaxis, :]
    test_x = torch.tensor(test_x, dtype=torch.float32, device=device)

    eval_size = 1
    zero_ten = torch.zeros((mid_layers, eval_size, mid_dim), dtype=torch.float32, device=device)
    test_y, hc = net.output_y_hc(test_x[:train_size], (zero_ten, zero_ten))
    test_x[train_size + 1, 0, 0] = test_y[-1]
    for i in range(train_size + 1, len(data) - 2):
        test_y, hc = net.output_y_hc(test_x[i:i + 1], hc)
        test_x[i + 1, 0, 0] = test_y[-1]
    pred_y = test_x[1:, 0, 0]
    pred_y = pred_y.cpu().data.numpy()

    diff_y = pred_y[train_size:] - data_y[train_size:-1]
    l1_loss = np.mean(np.abs(diff_y))
    l2_loss = np.mean(diff_y ** 2)
    print("L1: {:.3f}    L2: {:.3f}".format(l1_loss, l2_loss))

    plt.plot(pred_y, 'r', label='pred')
    plt.plot(data_y, 'b', label='real', alpha=0.3)
    plt.plot([train_size, train_size], [-1, 2], color='k', label='train | pred')
    plt.legend(loc='best')
    plt.savefig('lstm_reg.png')


class RegLSTM(nn.Module):
    def __init__(self, inp_dim, out_dim, mid_dim, mid_layers):
        super(RegLSTM, self).__init__()

        self.lstm = nn.LSTM(inp_dim, mid_dim, mid_layers)  # lstm
        self.fc = nn.Sequential(
            nn.Linear(mid_dim, mid_dim),
            nn.ReLU(),
            nn.Linear(mid_dim, out_dim),
        )

    def forward(self, x):
        y = self.lstm(x)[0]  # y, (h, c) = self.lstm(x)

        seq_len, batch_size, hid_dim = y.shape
        y = y.view(-1, hid_dim)
        y = self.fc(y)
        y = y.view(seq_len, batch_size, -1)
        return y

    """
    Examples::
        >>> rnn = nn.LSTM(10, 20, 2)
        >>> input = torch.randn(5, 3, 10)
        >>> h0 = torch.randn(2, 3, 20)
        >>> c0 = torch.randn(2, 3, 20)
        >>> output, (hn, cn) = rnn(input, (h0, c0))
    """

    def output_y_hc(self, x, hc):
        y, hc = self.lstm(x, hc)  # y, (h, c) = self.lstm(x)

        seq_len, batch_size, hid_dim = y.shape
        y = y.view(-1, hid_dim)
        y = self.fc(y)
        y = y.view(seq_len, batch_size, -1)
        return y, hc

def load_data():
    # passengers number of international airline, 1949-01~1960-12 per month
    seq_number = np.array(
        [112., 118., 132., 129., 121., 135., 148., 148., 136., 119., 104.,
         118., 115., 126., 141., 135., 125., 149., 170., 170., 158., 133.,
         114., 140., 145., 150., 178., 163., 172., 178., 199., 199., 184.,
         162., 146., 166., 171., 180., 193., 181., 183., 218., 230., 242.,
         209., 191., 172., 194., 196., 196., 236., 235., 229., 243., 264.,
         272., 237., 211., 180., 201., 204., 188., 235., 227., 234., 264.,
         302., 293., 259., 229., 203., 229., 242., 233., 267., 269., 270.,
         315., 364., 347., 312., 274., 237., 278., 284., 277., 317., 313.,
         318., 374., 413., 405., 355., 306., 271., 306., 315., 301., 356.,
         348., 355., 422., 465., 467., 404., 347., 305., 336., 340., 318.,
         362., 348., 363., 435., 491., 505., 404., 359., 310., 337., 360.,
         342., 406., 396., 420., 472., 548., 559., 463., 407., 362., 405.,
         417., 391., 419., 461., 472., 535., 622., 606., 508., 461., 390.,
         432.], dtype=np.float32)
    # plt.plot(seq_number)
    # plt.ion()
    seq_number = seq_number[:, np.newaxis] # add a new dimension
    
    # 12 years
    seq_year = np.arange(12)
    # 12 month
    seq_month = np.arange(12)
    seq_year_month = np.transpose(
    	[np.repeat(seq_year, len(seq_month)),
         np.tile(seq_month, len(seq_year))],
    )
    seq = np.concatenate((seq_number, seq_year_month), axis=1)

    # normalization
    seq = (seq - seq.mean(axis=0)) / seq.std(axis=0)
    return seq


if __name__ == '__main__':
    torch.manual_seed(1)
    run_train_lstm()