4. LSTMΒΆ
3μ₯μμλ Johns Hopkins Universityμμ μ 곡νλ μκ³μ΄ λ°μ΄ν°λ₯Ό μ§λνμ΅μ© λ°μ΄ν°λ‘ λ³ννλ κ³Όμ μ μ€μ΅νμ΅λλ€. μ΄λ² μ₯μμλ LSTMμ μ¬μ©νμ¬ μμΌλ‘μ λνλ―Όκ΅ μ½λ‘λ νμ§μ μλ₯Ό μμΈ‘ν΄λ³΄λ λͺ¨λΈμ ꡬμΆν΄λ³΄κ² μ΅λλ€.
4.1μ κ³Ό 4.2μ μμλ λνλ―Όκ΅ μ½λ‘λ λμ νμ§μ μ λ°μ΄ν°λ₯Ό λΆλ¬μ¨ ν, λ°μ΄ν°λ₯Ό νλ ¨μ©, κ²μ¦μ©, μνμ© λ°μ΄ν°λ‘ λλμ΄ λ³΄κ² μ΅λλ€. 4.3μ μμλ LSTM λͺ¨λΈμ μ μνκ³ 4.4μ μμλ μ μν λͺ¨λΈμ νμ΅μμΌ λ³΄κ² μ΅λλ€. λ§μ§λ§μΌλ‘ μ½λ‘λ νμ§μ μμ λν μμΈ‘κ°μ νμΈν΄λ³΄κ² μ΅λλ€.
μ°μ κΈ°λ³Έμ μΈ λͺ¨λλ€μ import ν΄μ€λλ€.
%matplotlib inline
μ notebookμ μ€νν λΈλΌμ°μ μμ λ°λ‘ κ·Έλ¦Όμ λ³Ό μ μκ² ν΄μ£Όλ κ², %config InlineBackend.figure_format='retina'
λ κ·Έλνμ ν΄μλλ₯Ό λμ¬μ€λλ€.
import torch
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.preprocessing import MinMaxScaler
from pandas.plotting import register_matplotlib_converters
from torch import nn, optim
%matplotlib inline
%config InlineBackend.figure_format='retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
rcParams['figure.figsize'] = 14, 10
register_matplotlib_converters()
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
<torch._C.Generator at 0x7f773ce2bb88>
4.1 λ°μ΄ν° λ€μ΄λ‘λΒΆ
λͺ¨λΈλ§ μ€μ΅μ μν΄ λνλ―Όκ΅ μ½λ‘λ λμ νμ§μ λ°μ΄ν°λ₯Ό λΆλ¬μ€κ² μ΅λλ€. 2.1μ μ λμ¨ μ½λλ₯Ό νμ©νκ² μ΅λλ€.
!git clone https://github.com/Pseudo-Lab/Tutorial-Book-Utils
!python Tutorial-Book-Utils/PL_data_loader.py --data COVIDTimeSeries
!unzip -q COVIDTimeSeries.zip
Cloning into 'Tutorial-Book-Utils'...
remote: Enumerating objects: 24, done.
remote: Counting objects: 100% (24/24), done.
remote: Compressing objects: 100% (20/20), done.
remote: Total 24 (delta 6), reused 14 (delta 3), pack-reused 0
Unpacking objects: 100% (24/24), done.
COVIDTimeSeries.zip is done!
4.2 λ°μ΄ν° μ μ²λ¦¬ΒΆ
3μ₯μμ μ€μ΅ν μ½λλ₯Ό νμ©ν΄ μκ³μ΄ λ°μ΄ν°λ₯Ό μ§λνμ΅μ© λ°μ΄ν°λ‘ λ³νν ν νμ΅μ©, κ²μ¦μ©, μνμ© λ°μ΄ν°λ‘ λΆλ¦¬νκ² μ΅λλ€. κ·Έλ¦¬κ³ νλ ¨μ© λ°μ΄ν°μ ν΅κ³λμ νμ©ν΄ μ€μΌμΌλ§μ μ§ννκ² μ΅λλ€.
#λνλ―Όκ΅ λ°μ΄ν°λ§ μΆμΆ ν μΌμλ³ νμ§μ μλ‘ λ³ν
confirmed = pd.read_csv('time_series_covid19_confirmed_global.csv')
confirmed[confirmed['Country/Region']=='Korea, South']
korea = confirmed[confirmed['Country/Region']=='Korea, South'].iloc[:,4:].T
korea.index = pd.to_datetime(korea.index)
daily_cases = korea.diff().fillna(korea.iloc[0]).astype('int')
def create_sequences(data, seq_length):
xs = []
ys = []
for i in range(len(data)-seq_length):
x = data.iloc[i:(i+seq_length)]
y = data.iloc[i+seq_length]
xs.append(x)
ys.append(y)
return np.array(xs), np.array(ys)
#μ§λνμ΅μ© λ°μ΄ν°λ‘ λ³ν
seq_length = 5
X, y = create_sequences(daily_cases, seq_length)
#νμ΅μ©, κ²μ¦μ©, μνμ©μΌλ‘ λΆλ¦¬
train_size = int(327 * 0.8)
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+33], y[train_size:train_size+33]
X_test, y_test = X[train_size+33:], y[train_size+33:]
MIN = X_train.min()
MAX = X_train.max()
def MinMaxScale(array, min, max):
return (array - min) / (max - min)
#MinMax μ€μΌμΌλ§
X_train = MinMaxScale(X_train, MIN, MAX)
y_train = MinMaxScale(y_train, MIN, MAX)
X_val = MinMaxScale(X_val, MIN, MAX)
y_val = MinMaxScale(y_val, MIN, MAX)
X_test = MinMaxScale(X_test, MIN, MAX)
y_test = MinMaxScale(y_test, MIN, MAX)
#Tensor ννλ‘ λ³ν
def make_Tensor(array):
return torch.from_numpy(array).float()
X_train = make_Tensor(X_train)
y_train = make_Tensor(y_train)
X_val = make_Tensor(X_val)
y_val = make_Tensor(y_val)
X_test = make_Tensor(X_test)
y_test = make_Tensor(y_test)
print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)
torch.Size([261, 5, 1]) torch.Size([33, 5, 1]) torch.Size([33, 5, 1])
torch.Size([261, 1]) torch.Size([33, 1]) torch.Size([33, 1])
4.3 LSTM λͺ¨λΈ μ μΒΆ
LSTM λͺ¨λΈμ μμ±ν΄λ³΄κ² μ΅λλ€. CovidPredictor
ν΄λμ€λ κΈ°λ³Έ λ³μ, layerλ₯Ό μ΄κΈ°ν ν΄μ£Όλ μμ±μ, νμ΅ μ΄κΈ°νλ₯Ό μν reset_hidden_state
ν¨μ, κ·Έλ¦¬κ³ μμΈ‘μ μν forward
ν¨μλ‘ κ΅¬μ±λΌ μμ΅λλ€.
class CovidPredictor(nn.Module):
def __init__(self, n_features, n_hidden, seq_len, n_layers):
super(CovidPredictor, self).__init__()
self.n_hidden = n_hidden
self.seq_len = seq_len
self.n_layers = n_layers
self.lstm = nn.LSTM(
input_size=n_features,
hidden_size=n_hidden,
num_layers=n_layers
)
self.linear = nn.Linear(in_features=n_hidden, out_features=1)
def reset_hidden_state(self):
self.hidden = (
torch.zeros(self.n_layers, self.seq_len, self.n_hidden),
torch.zeros(self.n_layers, self.seq_len, self.n_hidden)
)
def forward(self, sequences):
lstm_out, self.hidden = self.lstm(
sequences.view(len(sequences), self.seq_len, -1),
self.hidden
)
last_time_step = lstm_out.view(self.seq_len, len(sequences), self.n_hidden)[-1]
y_pred = self.linear(last_time_step)
return y_pred
4.4 νμ΅ΒΆ
4.3μ μμ μ μν CovidPredictor
ν΄λμ€λ₯Ό νμ΅μν€κΈ° μν΄ train_model
ν¨μλ₯Ό μ μν©λλ€. νμ΅μ© λ°μ΄ν°μ κ²μ¦μ© λ°μ΄ν°λ₯Ό μ
λ ₯μΌλ‘ λ°μΌλ©°, num_epochs
λ νμ΅μν¬ epoch νμλ₯Ό μλ―Έν©λλ€. verbose
λ epochμ verbose
λ²μ§Έ λ§λ€ μΆλ ₯νλ€λ λ»μ
λλ€. patience
λ κ²μ¦μ© μμ€κ°(validation loss)μ patience
λ²μ§Έ epochλ§λ€ patience
λ§νΌμ μ΄μ μμ€κ°κ³Ό λΉκ΅ν΄ μ€μ΄λ€μ§ μμΌλ©΄ νμ΅μ μ’
λ£ μν¬ λ μ¬μ©νλ μΈμμ
λλ€. PyTorchμμλ hidden_state
λ₯Ό 보쑴νκΈ° λλ¬Έμ μλ‘μ΄ μνμ€κ° μ
λ ₯λ λλ§λ€ hidden_state
λ₯Ό μ΄κΈ°ν μμΌμΌ μ΄μ μνμ€μ hidden_state
λ‘ λΆν° μν₯μ λ°μ§ μμ΅λλ€.
def train_model(model, train_data, train_labels, val_data=None, val_labels=None, num_epochs=100, verbose = 10, patience = 10):
loss_fn = torch.nn.L1Loss() #
optimiser = torch.optim.Adam(model.parameters(), lr=0.001)
train_hist = []
val_hist = []
for t in range(num_epochs):
epoch_loss = 0
for idx, seq in enumerate(train_data):
model.reset_hidden_state() # seq λ³ hidden state reset
# train loss
seq = torch.unsqueeze(seq, 0)
y_pred = model(seq)
loss = loss_fn(y_pred[0].float(), train_labels[idx]) # 1κ°μ stepμ λν loss
# update weights
optimiser.zero_grad()
loss.backward()
optimiser.step()
epoch_loss += loss.item()
train_hist.append(epoch_loss / len(train_data))
if val_data is not None:
with torch.no_grad():
val_loss = 0
for val_idx, val_seq in enumerate(val_data):
model.reset_hidden_state() # seq λ³λ‘ hidden state μ΄κΈ°ν
val_seq = torch.unsqueeze(val_seq, 0)
y_val_pred = model(val_seq)
val_step_loss = loss_fn(y_val_pred[0].float(), val_labels[val_idx])
val_loss += val_step_loss
val_hist.append(val_loss / len(val_data)) # val histμ μΆκ°
## verbose λ²μ§Έ λ§λ€ loss μΆλ ₯
if t % verbose == 0:
print(f'Epoch {t} train loss: {epoch_loss / len(train_data)} val loss: {val_loss / len(val_data)}')
## patience λ²μ§Έ λ§λ€ early stopping μ¬λΆ νμΈ
if (t % patience == 0) & (t != 0):
## lossκ° μ»€μ‘λ€λ©΄ early stop
if val_hist[t - patience] < val_hist[t] :
print('\n Early Stopping')
break
elif t % verbose == 0:
print(f'Epoch {t} train loss: {epoch_loss / len(train_data)}')
return model, train_hist, val_hist
model = CovidPredictor(
n_features=1,
n_hidden=4,
seq_len=seq_length,
n_layers=1
)
model, train_hist, val_hist = train_model(
model,
X_train,
y_train,
X_val,
y_val,
num_epochs=100,
verbose=10,
patience=50
)
Epoch 0 train loss: 0.0846735675929835 val loss: 0.047220394015312195
Epoch 10 train loss: 0.03268902644807637 val loss: 0.03414301574230194
Epoch 20 train loss: 0.03255926527910762 val loss: 0.03243739902973175
Epoch 30 train loss: 0.032682761279652 val loss: 0.033064160495996475
Epoch 40 train loss: 0.0325928641549201 val loss: 0.032514143735170364
Epoch 50 train loss: 0.032316437919741904 val loss: 0.033000096678733826
Epoch 60 train loss: 0.03259847856704788 val loss: 0.03266565129160881
Epoch 70 train loss: 0.03220883647418827 val loss: 0.032897673547267914
Epoch 80 train loss: 0.03264666339685834 val loss: 0.032588861882686615
Epoch 90 train loss: 0.032349443449406844 val loss: 0.03221791982650757
train_hist
μ val_hist
μ μ μ₯λ μμ€κ°λ€μ μκ°ν ν΄λ³΄κ² μ΅λλ€.
plt.plot(train_hist, label="Training loss")
plt.plot(val_hist, label="Val loss")
plt.legend()
<matplotlib.legend.Legend at 0x7f76de333fd0>
4.5 μμΈ‘ΒΆ
μ΄λ² μ μμλ ꡬμΆν λͺ¨λΈμ νμ©ν΄ μλ‘ λ€μ΄μ€λ λ°μ΄ν°μ λν μμΈ‘μ μ§νν΄λ³΄κ² μ΅λλ€. νμ¬ κ΅¬μΆλ λͺ¨λΈμ \(t-5\)μμ λΆν° \(t-1\)μμ κΉμ§μ λ°μ΄ν°λ₯Ό νμ©ν΄ \(t\)μμ μ νμ§μλ₯Ό μμΈ‘ν©λλ€. λ§μ°¬κ°μ§λ‘ μ€μ κ΄μΈ‘λ \(t-5\)λΆν° \(t-1\)κΉμ§μ μλ‘μ΄ λ°μ΄ν°λ₯Ό μ
λ ₯νλ€λ©΄ \(t\)μμ μ νμ§μμ λν μμΈ‘μ΄ κ°λ₯ν κ²μ΄λ©°, μ΄λ₯Ό One-Step
μμΈ‘μ΄λΌκ³ ν©λλ€. κ³Όκ±° λ°μ΄ν°λ₯Ό νμ©ν΄ ν λ¨μ μλ§ μμΈ‘νλ λ°©λ²μ
λλ€.
λ°λ©΄ κ³Όκ±° λ°μ΄ν°λ₯Ό νμ©ν΄ λ λ¨μ, μΈ λ¨μ λ±μ μ¬λ¬ λ¨μ μμ μμΈ‘νλ κ²μ Multi-Step
μμΈ‘μ΄λΌκ³ ν©λλ€. Multi-Step
μμΈ‘μλ ν¬κ² 2κ°μ§ λ°©λ²μΌλ‘ λλλλ°, μμ ꡬμΆν One-Step
μμΈ‘νλ λͺ¨λΈμ νμ©νλ λ°©λ²κ³Ό seq2seq
ꡬ쑰μ λͺ¨λΈμ νμ©νλ λ°©λ²μ΄ μμ΅λλ€.
첫λ²μ§Έ λ°©λ²μ One-Step
μμΈ‘νλ λͺ¨λΈμμ λ°νν \(t\)μμ μ μμΈ‘κ° \(\hat{t}\)μ νμ©ν΄ \(t-4\), \(t-3\), \(t-2\), \(t-1\), \(\hat{t}\) κ°μΌλ‘ \(t+1\)μμ μ κ°μ μμΈ‘ν©λλ€. μ΄μ²λΌ λͺ¨λΈμμ λμ¨ μμΈ‘κ°μ λ€μ λͺ¨λΈ μ
λ ₯κ°μΌλ‘ λ£μ΄ λ°λ³΅μ μΌλ‘ μμΈ‘ν μ μμ§λ§, μμΈ‘κ°μ μ€μ°¨κ° λμ λμ΄ μ μ°¨ μμΈ‘ μ±λ₯μ΄ λ¨μ΄μ§λ νμμ΄ λ°μν©λλ€.
λλ²μ§Έ λ°©λ²μ seq2seq
ꡬ쑰λ₯Ό νμ©ν΄ μμΈ‘νλ κ²μ
λλ€. μμΈ‘νκ³ μ νλ λ―Έλ κΈ°κ° λ§νΌ decoder
κΈΈμ΄λ₯Ό μ€μ ν΄μ μμΈ‘νλ λ°©λ²μ
λλ€. decoder
λ€νΈμν¬λ₯Ό ν΅ν΄ μΆκ° μ 보λ₯Ό μμΈ‘κ° μ°μΆ μ νμ©ν μ μλ€λ μ₯μ μ΄ μμ§λ§ μμΈ‘ μκ° λ¨μκ° κ³ μ λΌμΌ ν©λλ€.
μ΄λ² μ μμλ One-Step
μμΈ‘ λͺ¨λΈμ λ°λ³΅μ μΌλ‘ νμ©ν΄ Multi-Step
μμΈ‘νλ κ²μ μ½λλ‘ νμΈν΄λ³΄κ² μ΅λλ€.
4.5.1 One-Step μμΈ‘ΒΆ
μ°μ μμ λ§λ λͺ¨λΈμ λν One-Step
μμΈ‘μ μ§νν΄ λͺ¨λΈ μ±λ₯μ νμΈν΄λ³΄κ² μ΅λλ€. ꡬμΆλ μν λ°μ΄ν°μ λν μμΈ‘μ μ§νν΄λ³΄κ² μ΅λλ€. μμΈ‘ν λλ μλ‘μ΄ μνμ€κ° μ
λ ₯λ λ λ§λ€ hidden_state
λ μ΄κΈ°νλ₯Ό ν΄μ€μΌ μ΄μ μνμ€μ hidden_state
κ° λ°μλμ§ μμ΅λλ€. torch.unsqueeze
ν¨μλ₯Ό μ¬μ©νμ¬ μ
λ ₯ λ°μ΄ν°μ μ°¨μμ λλ € λͺ¨λΈμ΄ μμνλ 3μ°¨μ ννλ‘ λ§λ€μ΄μ€λλ€. κ·Έλ¦¬κ³ μμΈ‘λ λ°μ΄ν° λ΄μ μ‘΄μ¬νλ μ€μΉΌλΌκ°λ§ μΆμΆνμ¬ preds
리μ€νΈμ μΆκ°ν©λλ€.
pred_dataset = X_test
with torch.no_grad():
preds = []
for _ in range(len(pred_dataset)):
model.reset_hidden_state()
y_test_pred = model(torch.unsqueeze(pred_dataset[_], 0))
pred = torch.flatten(y_test_pred).item()
preds.append(pred)
λͺ¨λΈμ΄ μμΈ‘ν κ°κ³Ό μ€μ κ°κ³Ό λΉκ΅λ₯Ό μ§νν΄λ³΄κ² μ΅λλ€. y_test
μ μ€μ κ°μ΄ μ μ₯λΌμμΌλ©° νμ¬ μ€μΌμΌλ§λ μνμ
λλ€. μλ μ€μΌμΌλ‘ λ³νμμΌμ£ΌκΈ° μν΄μ μλ μ°μμ νμ©νκ² μ΅λλ€. MinMax μ€μΌμΌλ§μ μ μ©ν λ μ¬μ©ν μ°μμ μμ©νμ¬ μλ κ°μΌλ‘ λ³ννλ μ°μμ
λλ€.
\(x = x_{scaled} * (x_{max} - x_{min}) + x_{min}\)
μ΄λ² λ°μ΄ν°μμ \(x_{min}\)μ 0μ΄μμ΅λλ€. κ·Έλ¬λ―λ‘ μλ μ€μΌμΌλ‘ 볡ꡬνκΈ° μν΄μ \(x_{max}\)λ§ κ³±ν΄μ£Όλ©΄ λ©λλ€.
plt.plot(daily_cases.index[-len(y_test):], np.array(y_test) * MAX, label='True')
plt.plot(daily_cases.index[-len(preds):], np.array(preds) * MAX, label='Pred')
plt.xticks(rotation=45)
plt.legend()
<matplotlib.legend.Legend at 0x7f76dc9ac748>
νλμ κ·Έλνλ μν λ°μ΄ν°μ μ€μ κ°μ λνλ΄λ©° μ£Όν©μ κ·Έλνλ μμΈ‘κ°μ λνλ λλ€. νμ§μκ° μμΉνλ μΆμΈλ λͺ¨λΈμ΄ μμΈ‘νκ³ μμ§λ§ νμ§μκ° κΈκ²©ν μ¦κ°νλ ꡬκ°μ λν΄μλ μμΈ‘μ΄ μ λμ§ μμμ μ μ μμ΅λλ€.
μμΈ‘κ°μ νκ· μ€μ°¨λ₯Ό ꡬνκΈ° μν΄ MAE κ°μ μ°μΆν΄λ³΄κ² μ΅λλ€.
def MAE(true, pred):
return np.mean(np.abs(true-pred))
MAE(np.array(y_test)*MAX, np.array(preds)*MAX)
247.3132225984521
μν λ°μ΄ν°μ λν μμΈ‘κ°μ΄ νκ· μ μΌλ‘ μ€μ κ°κ³Ό μ½ 250λͺ μ μ°¨μ΄λ₯Ό μ§λκ³ μλ€λ κ²μ μ μ μμ΅λλ€. κ³Όκ±° νμ§μ μ λΏλ§ μλλΌ μΈκ΅¬ μ΄λ λ°μ΄ν°, μΈκ΅¬ ν΅κ³ λ°μ΄ν° λ±μ νμ©νλ€λ©΄ λ³΄λ€ μ κ΅ν μμΈ‘μ΄ κ°λ₯ν κ²μΌλ‘ 보μ λλ€.
4.5.2 Multi-Step μμΈ‘ΒΆ
One-Step
μμΈ‘ λͺ¨λΈμ λ°λ³΅μ μΌλ‘ νμ©ν΄ Multi-Step
μμΈ‘μ μ§νν΄λ³΄κ² μ΅λλ€. μν λ°μ΄ν°μ 첫λ²μ§Έ μνμ νμ©ν΄ λμ¨ μμΈ‘κ°μ μ
λ ₯ μνμ€μ ν¬ν¨μμΌ λ€μ κ°μ μμΈ‘νκ³ , λ ν΄λΉ κ°μ μ
λ ₯ μνΈμ€μ ν¬ν¨μμΌ λ€μ κ°μ μμΈ‘νλ κ³Όμ μ λ°λ³΅νκ² μ΅λλ€.
with torch.no_grad():
test_seq = X_test[:1] # 첫λ²μ§Έ ν
μ€νΈ μ
, 3μ°¨μ
preds = []
for _ in range(len(X_test)):
model.reset_hidden_state()
y_test_pred = model(test_seq)
pred = torch.flatten(y_test_pred).item()
preds.append(pred)
new_seq = test_seq.numpy().flatten()
new_seq = np.append(new_seq, [pred]) # μνμ€μ μΆκ°
new_seq = new_seq[1:] # μΆκ°λ κ°μ ν¬ν¨νμ¬ seq_lengthκ° 5κ°κ° λλλ‘ λ§μΆκΈ°
test_seq = torch.as_tensor(new_seq).view(1, seq_length, 1).float()
μμ μΈκΈν κ²μ²λΌ ν΄λΉ λ°©λ²μ μμΈ‘ κΈ°κ°μ΄ κΈΈμ΄μ§μλ‘ μ€μ°¨κ° λμ λμ΄ λͺ¨λΈμ μ±λ₯μ 보μ₯νκΈ° μ΄λ ΅μ΅λλ€. μλ κ·Έλνλ‘ μμΈ‘κ°μ μ€μ κ°κ³Ό λΉκ΅ν΄ μκ°ν ν΄λ³΄κ² μ΅λλ€.
plt.plot(daily_cases.index[-len(y_test):], np.array(y_test) * MAX, label='True')
plt.plot(daily_cases.index[-len(preds):], np.array(preds) * MAX, label='Pred')
plt.xticks(rotation=45)
plt.legend()
<matplotlib.legend.Legend at 0x7f76dc271278>
μ§κΈκΉμ§ μ½λ‘λ νμ§μ λ°μ΄ν°λ₯Ό νμ©ν΄ LSTM λͺ¨λΈ κ΅¬μΆ μ€μ΅μ μ§ννμ΅λλ€. λ€μ μ₯μμλ CNN-LSTMμ μκ³μ΄ λ°μ΄ν°μ μ μ©ν΄λ³΄λ μ€μ΅μ μ§ννκ² μ΅λλ€.