|
|
|
|
|
|
|
import copy |
|
from pathlib import Path |
|
import warnings |
|
import holidays |
|
import seaborn as sns |
|
import matplotlib |
|
import matplotlib.dates as mdates |
|
import matplotlib.pyplot as plt |
|
plt.style.use('fivethirtyeight') |
|
import numpy as np |
|
import pandas as pd |
|
import glob |
|
import csv |
|
import lightning.pytorch as pl |
|
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor |
|
from lightning.pytorch.loggers import TensorBoardLogger |
|
import torch |
|
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet |
|
from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder |
|
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss |
|
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters |
|
import random |
|
import gc |
|
import tensorflow as tf |
|
import tensorboard as tb |
|
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile |
|
import os |
|
import math |
|
import sys |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import MinMaxScaler |
|
import tensorflow as tf |
|
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed |
|
from tensorflow.keras.layers import MaxPooling1D, Flatten |
|
from tensorflow.keras.regularizers import L1, L2 |
|
from tensorflow.keras.metrics import Accuracy |
|
from tensorflow.keras.metrics import RootMeanSquaredError |
|
from sklearn.metrics import mean_squared_error as MSE |
|
from sklearn.model_selection import KFold |
|
from sklearn.inspection import permutation_importance |
|
from tensorflow.keras.utils import plot_model |
|
from sklearn.metrics import explained_variance_score, mean_poisson_deviance, mean_gamma_deviance, mean_squared_error, mean_squared_log_error, d2_absolute_error_score, d2_pinball_score, d2_tweedie_score |
|
from sklearn.metrics import r2_score |
|
from sklearn.metrics import max_error |
|
import datetime |
|
from datetime import date |
|
import optuna |
|
from tensorflow.keras.callbacks import Callback |
|
from optuna.integration import TFKerasPruningCallback |
|
import shutil |
|
import gradio as gr |
|
|
|
|
|
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
random.seed(30) |
|
np.random.seed(30) |
|
tf.random.set_seed(30) |
|
torch.manual_seed(30) |
|
torch.cuda.manual_seed(30) |
|
|
|
|
|
PATIENCE = 30 |
|
MAX_EPOCHS = 3 |
|
LEARNING_RATE = 0.01 |
|
OPTUNA = True |
|
ACCELERATOR = "cpu" |
|
|
|
|
|
|
|
|
|
w = 7 |
|
prax = [0 for x in range(w)] |
|
|
|
|
|
|
|
def objective(trial, X_train, y_train, X_test, y_test): |
|
model = tf.keras.Sequential() |
|
|
|
|
|
|
|
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1))) |
|
|
|
|
|
|
|
model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_1", 32, 256), return_sequences=True))) |
|
model.add(Dropout(trial.suggest_float("dropout_1", 0.1, 0.5))) |
|
model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_2", 32, 256), return_sequences=False))) |
|
model.add(Dropout(trial.suggest_float("dropout_2", 0.1, 0.5))) |
|
|
|
|
|
model.add(Dense(1, activation='relu')) |
|
model.compile(optimizer='adam', loss='mse', metrics=['mse']) |
|
|
|
|
|
pruning_callback = TFKerasPruningCallback(trial, "val_loss") |
|
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=32, verbose=0, callbacks=[pruning_callback]) |
|
|
|
|
|
loss = model.evaluate(X_test, y_test, verbose=0)[0] |
|
|
|
return loss |
|
|
|
|
|
|
|
def modelCNNLSTM(csv_file, prax): |
|
|
|
df = csv_file |
|
|
|
temp_data = df.iloc[0:len(df)-100, 1:21] |
|
trek = df.iloc[len(df)-100:,1:21] |
|
|
|
data = temp_data |
|
sc = MinMaxScaler() |
|
|
|
train_size = int(len(data) * 0.8) |
|
train_data, test_data = data[:train_size], data[train_size:] |
|
|
|
X_train, y_train = train_data, train_data['Close'] |
|
X_test, y_test = test_data, test_data['Close'] |
|
|
|
X_train = X_train[0:len(X_train)-1] |
|
y_train = y_train[1:len(y_train)] |
|
X_test = X_test[0:len(X_test)-1] |
|
y_test = y_test[1:len(y_test)] |
|
|
|
Xt = X_train |
|
Xts = X_test |
|
Yt = y_train |
|
Yts = y_test |
|
|
|
y_train = y_train.values.reshape(-1,1) |
|
y_test = y_test.values.reshape(-1,1) |
|
|
|
X_train = sc.fit_transform(X_train) |
|
y_train = sc.fit_transform(y_train) |
|
X_test = sc.fit_transform(X_test) |
|
y_test = sc.fit_transform(y_test) |
|
|
|
x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns) |
|
y_tr=pd.DataFrame(y_train, index = Yt.index) |
|
x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns) |
|
y_te=pd.DataFrame(y_test, index = Yts.index) |
|
|
|
|
|
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1)) |
|
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1)) |
|
|
|
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=4, n_startup_trials=4)) |
|
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) |
|
study.optimize(fn, n_trials=5) |
|
|
|
best_params = study.best_params |
|
|
|
|
|
model = tf.keras.Sequential() |
|
|
|
|
|
|
|
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1))) |
|
|
|
|
|
|
|
model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True))) |
|
model.add(Dropout(best_params["dropout_1"])) |
|
model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False))) |
|
model.add(Dropout(best_params["dropout_2"])) |
|
|
|
|
|
model.add(Dense(1, activation='relu')) |
|
model.compile(optimizer='adam', loss='mse', metrics=['mse']) |
|
|
|
|
|
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0) |
|
|
|
|
|
loss = model.evaluate(X_test, y_test, verbose=0)[0] |
|
|
|
print(f"Final loss (without KFold): {loss}") |
|
|
|
kfold = KFold(n_splits=10, shuffle=True) |
|
|
|
inputs = np.concatenate((X_train, X_test), axis=0) |
|
targets = np.concatenate((y_train, y_test), axis=0) |
|
acc_per_fold = [] |
|
loss_per_fold = [] |
|
xgb_res = [] |
|
num_epochs = 10 |
|
batch_size = 32 |
|
|
|
fold_no = 1 |
|
print('------------------------------------------------------------------------') |
|
print("Training for 10 folds... Standby") |
|
for train, test in kfold.split(inputs, targets): |
|
|
|
|
|
history = model.fit(inputs[train], targets[train], |
|
batch_size=32, |
|
epochs=15, |
|
verbose=0) |
|
|
|
scores = model.evaluate(inputs[test], targets[test], verbose=0) |
|
|
|
acc_per_fold.append(scores[1] * 100) |
|
loss_per_fold.append(scores[0]) |
|
fold_no = fold_no + 1 |
|
|
|
|
|
print('------------------------------------------------------------------------') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trek = df.iloc[0:len(df), 1:21] |
|
Y = trek[0:len(trek)] |
|
YP = trek[1:len(trek)] |
|
Y1 = Y['Close'] |
|
Y2 = YP['Close'] |
|
Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns) |
|
|
|
Y = np.array(Y) |
|
Y1 = np.array(Y1) |
|
Y = sc.fit_transform(Y) |
|
Y1 = Y1.reshape(-1,1) |
|
Y1 = sc.fit_transform(Y1) |
|
|
|
train_X = Y.reshape(Y.shape[0],Y.shape[1],1) |
|
|
|
pred = model.predict(train_X, verbose=0) |
|
pred = np.array(pred).reshape(-1,1) |
|
var2 = max_error(pred.reshape(-1,1), Y1) |
|
print('Max Error: %f' % var2) |
|
prax[5] = float(var2) |
|
pred = sc.inverse_transform(pred) |
|
|
|
print(pred[-2], pred[-1]) |
|
prax[3] = pred[-2] |
|
prax[4] = pred[-1] |
|
if(pred[-1]-pred[-2]>0): |
|
prax[6] = 1 |
|
elif(pred[-1]-pred[-2]==0): |
|
prax[6] = 0 |
|
else: |
|
prax[6] = -1 |
|
|
|
|
|
|
|
def modelCNNLSTM_OpenGap(csv_file, prax): |
|
|
|
df = csv_file |
|
datLength = len(df) |
|
df['O-C'] = 0 |
|
for i in range(datLength): |
|
if i == 0: |
|
df['O-C'][i] = 0 |
|
continue |
|
else: |
|
df['O-C'][i] = df['Open'][i] - df['Close'][i-1] |
|
temp_data = df.iloc[0:datLength-100, 1:22] |
|
trek = df.iloc[datLength-100:,1:22] |
|
|
|
data = temp_data |
|
|
|
sc = MinMaxScaler() |
|
|
|
train_size = int(len(data) * 0.8) |
|
train_data, test_data = data[:train_size], data[train_size:] |
|
|
|
|
|
X_train, y_train = train_data, train_data['Close'] |
|
X_test, y_test = test_data, test_data['Close'] |
|
|
|
X_train = X_train[0:len(X_train)-1] |
|
y_train = y_train[1:len(y_train)] |
|
X_test = X_test[0:len(X_test)-1] |
|
y_test = y_test[1:len(y_test)] |
|
|
|
Xt = X_train |
|
Xts = X_test |
|
Yt = y_train |
|
Yts = y_test |
|
|
|
y_train = y_train.values.reshape(-1,1) |
|
y_test = y_test.values.reshape(-1,1) |
|
|
|
X_train = sc.fit_transform(X_train) |
|
y_train = sc.fit_transform(y_train) |
|
X_test = sc.fit_transform(X_test) |
|
y_test = sc.fit_transform(y_test) |
|
|
|
x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns) |
|
y_tr=pd.DataFrame(y_train, index = Yt.index) |
|
x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns) |
|
y_te=pd.DataFrame(y_test, index = Yts.index) |
|
|
|
|
|
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1)) |
|
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1)) |
|
|
|
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=2, n_startup_trials=2)) |
|
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) |
|
study.optimize(fn, n_trials=5) |
|
|
|
best_params = study.best_params |
|
|
|
|
|
model = tf.keras.Sequential() |
|
|
|
|
|
|
|
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1))) |
|
|
|
|
|
|
|
model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True))) |
|
model.add(Dropout(best_params["dropout_1"])) |
|
model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False))) |
|
model.add(Dropout(best_params["dropout_2"])) |
|
|
|
|
|
model.add(Dense(1, activation='relu')) |
|
model.compile(optimizer='adam', loss='mse', metrics=['mse']) |
|
|
|
|
|
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0) |
|
|
|
|
|
loss = model.evaluate(X_test, y_test, verbose=0)[0] |
|
|
|
print(f"Final loss (without KFold): {loss}") |
|
|
|
kfold = KFold(n_splits=10, shuffle=True) |
|
|
|
inputs = np.concatenate((X_train, X_test), axis=0) |
|
targets = np.concatenate((y_train, y_test), axis=0) |
|
acc_per_fold = [] |
|
loss_per_fold = [] |
|
xgb_res = [] |
|
num_epochs = 10 |
|
batch_size = 32 |
|
|
|
fold_no = 1 |
|
print('------------------------------------------------------------------------') |
|
print("Training for 10 folds... Standby") |
|
for train, test in kfold.split(inputs, targets): |
|
|
|
|
|
history = model.fit(inputs[train], targets[train], |
|
batch_size=32, |
|
epochs=15, |
|
verbose=0) |
|
|
|
scores = model.evaluate(inputs[test], targets[test], verbose=0) |
|
|
|
acc_per_fold.append(scores[1] * 100) |
|
loss_per_fold.append(scores[0]) |
|
fold_no = fold_no + 1 |
|
|
|
|
|
print('------------------------------------------------------------------------') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trek = df.iloc[0:len(df), 1:22] |
|
Y = trek[0:len(trek)] |
|
YP = trek[1:len(trek)] |
|
Y1 = Y['Close'] |
|
Y2 = YP['Close'] |
|
Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns) |
|
|
|
Y = np.array(Y) |
|
Y1 = np.array(Y1) |
|
Y = sc.fit_transform(Y) |
|
Y1 = Y1.reshape(-1,1) |
|
Y1 = sc.fit_transform(Y1) |
|
|
|
train_X = Y.reshape(Y.shape[0],Y.shape[1],1) |
|
|
|
pred = model.predict(train_X, verbose=0) |
|
pred = np.array(pred).reshape(-1,1) |
|
var2 = max_error(pred.reshape(-1,1), Y1) |
|
print('Max Error: %f' % var2) |
|
prax[5] = float(var2) |
|
pred = sc.inverse_transform(pred) |
|
|
|
print(pred[-2], pred[-1]) |
|
prax[3] = pred[-2] |
|
prax[4] = pred[-1] |
|
if(pred[-1]-pred[-2]>0): |
|
prax[6] = 1 |
|
elif(pred[-1]-pred[-2]==0): |
|
prax[6] = 0 |
|
else: |
|
prax[6] = -1 |
|
|
|
|
|
|
|
def modelTFT(csv_file, prax): |
|
train = csv_file |
|
|
|
train['date'] = pd.to_datetime(train['Date/Time']) |
|
|
|
|
|
data = pd.concat([train], axis = 0, ignore_index=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>""" |
|
|
|
|
|
|
|
data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True) |
|
.rename_axis('time_idx')).reset_index(), on = ['Date/Time'])) |
|
|
|
data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") |
|
data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") |
|
data["month"] = data['date'].dt.month.astype(str).astype("category") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gc.collect() |
|
data.sample(5, random_state=30) |
|
|
|
train = data.iloc[:len(train)] |
|
test = data.iloc[len(train):] |
|
|
|
max_prediction_length = 2 |
|
max_encoder_length = train.date.nunique() |
|
training_cutoff = train["time_idx"].max() - max_prediction_length |
|
|
|
|
|
training = TimeSeriesDataSet( |
|
train[lambda x: x.time_idx <= training_cutoff], |
|
time_idx="time_idx", |
|
target="Close", |
|
group_ids=["Ticker"], |
|
min_encoder_length=max_prediction_length, |
|
max_encoder_length=max_encoder_length, |
|
max_prediction_length=max_prediction_length, |
|
static_categoricals=["Ticker"], |
|
time_varying_known_categoricals=["month", "week_of_year", "day_of_week"], |
|
|
|
time_varying_known_reals=["time_idx"], |
|
time_varying_unknown_categoricals=[], |
|
time_varying_unknown_reals=[ |
|
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200' |
|
], |
|
target_normalizer=GroupNormalizer( |
|
groups=['Ticker'], transformation="softplus" |
|
), |
|
categorical_encoders={ |
|
'week_of_year':NaNLabelEncoder(add_nan=True) |
|
}, |
|
|
|
add_relative_time_idx=True, |
|
add_target_scales=True, |
|
add_encoder_length=True, |
|
) |
|
|
|
|
|
|
|
validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True) |
|
|
|
|
|
batch_size = 128 |
|
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) |
|
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0) |
|
|
|
|
|
|
|
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)]) |
|
baseline_predictions = Baseline().predict(val_dataloader) |
|
(actuals - baseline_predictions).abs().mean().item() |
|
|
|
sm = SMAPE() |
|
|
|
print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}") |
|
|
|
early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min") |
|
lr_logger = LearningRateMonitor() |
|
logger = TensorBoardLogger("lightning_logs") |
|
|
|
trainer = pl.Trainer( |
|
max_epochs=1, |
|
accelerator=ACCELERATOR, |
|
enable_model_summary=False, |
|
gradient_clip_val=0.25, |
|
limit_train_batches=10, |
|
|
|
callbacks=[lr_logger, early_stop_callback], |
|
logger=logger, |
|
) |
|
|
|
tft = TemporalFusionTransformer.from_dataset( |
|
training, |
|
learning_rate=LEARNING_RATE, |
|
lstm_layers=2, |
|
hidden_size=16, |
|
attention_head_size=2, |
|
dropout=0.2, |
|
hidden_continuous_size=8, |
|
output_size=1, |
|
loss=SMAPE(), |
|
log_interval=10, |
|
reduce_on_plateau_patience=4 |
|
) |
|
|
|
tft.to(DEVICE) |
|
trainer.fit( |
|
tft, |
|
train_dataloaders=train_dataloader, |
|
val_dataloaders=val_dataloader, |
|
) |
|
|
|
|
|
|
|
if OPTUNA: |
|
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters |
|
|
|
|
|
study = optimize_hyperparameters( |
|
train_dataloader, |
|
val_dataloader, |
|
model_path="optuna_test", |
|
n_trials=5, |
|
max_epochs=MAX_EPOCHS, |
|
gradient_clip_val_range=(0.01, 0.3), |
|
hidden_size_range=(8, 24), |
|
hidden_continuous_size_range=(8, 12), |
|
attention_head_size_range=(2, 4), |
|
learning_rate_range=(0.01, 0.05), |
|
dropout_range=(0.1, 0.25), |
|
trainer_kwargs=dict(limit_train_batches=20), |
|
reduce_on_plateau_patience=4, |
|
pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_startup_trials=3), |
|
use_learning_rate_finder=False, |
|
) |
|
|
|
|
|
trainer = pl.Trainer( |
|
max_epochs=MAX_EPOCHS, |
|
accelerator=ACCELERATOR, |
|
enable_model_summary=False, |
|
gradient_clip_val=study.best_params['gradient_clip_val'], |
|
limit_train_batches=20, |
|
|
|
callbacks=[lr_logger, early_stop_callback], |
|
logger=logger, |
|
) |
|
|
|
tft = TemporalFusionTransformer.from_dataset( |
|
training, |
|
learning_rate=study.best_params['learning_rate'], |
|
lstm_layers=2, |
|
hidden_size=study.best_params['hidden_size'], |
|
attention_head_size=study.best_params['attention_head_size'], |
|
dropout=study.best_params['dropout'], |
|
hidden_continuous_size=study.best_params['hidden_continuous_size'], |
|
output_size=1, |
|
loss=SMAPE(), |
|
log_interval=10, |
|
reduce_on_plateau_patience=4 |
|
) |
|
|
|
tft.to(DEVICE) |
|
trainer.fit( |
|
tft, |
|
train_dataloaders=train_dataloader, |
|
val_dataloaders=val_dataloader, |
|
) |
|
|
|
|
|
best_model_path = trainer.checkpoint_callback.best_model_path |
|
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path) |
|
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)]) |
|
predictions = best_tft.predict(val_dataloader, mode="prediction") |
|
raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True) |
|
|
|
sm = SMAPE() |
|
print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions).mean(axis = 1).median().item()}") |
|
prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item() |
|
|
|
|
|
print(raw_predictions[0][0]) |
|
prax[3] = '-' |
|
prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0] |
|
t = prax[4] |
|
tm = data['Close'][len(data)-1] |
|
if(t-tm>0): |
|
prax[6] = 1 |
|
elif(t-tm==0): |
|
prax[6] = 0 |
|
else: |
|
prax[6] = -1 |
|
|
|
print("-----------") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def modelTFT_OpenGap(csv_file, prax): |
|
train = csv_file |
|
|
|
train['date'] = pd.to_datetime(train['Date/Time']) |
|
|
|
datLength = len(train) |
|
train['O-C'] = 0 |
|
for i in range(datLength): |
|
if i == 0: |
|
train['O-C'][i] = 0 |
|
continue |
|
else: |
|
train['O-C'][i] = train['Open'][i] - train['Close'][i-1] |
|
data = pd.concat([train], axis = 0, ignore_index=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>""" |
|
|
|
|
|
|
|
data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True) |
|
.rename_axis('time_idx')).reset_index(), on = ['Date/Time'])) |
|
|
|
data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") |
|
data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") |
|
data["month"] = data['date'].dt.month.astype(str).astype("category") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gc.collect() |
|
data.sample(5, random_state=30) |
|
|
|
train = data.iloc[:len(train)] |
|
test = data.iloc[len(train):] |
|
|
|
max_prediction_length = 2 |
|
max_encoder_length = train.date.nunique() |
|
training_cutoff = train["time_idx"].max() - max_prediction_length |
|
|
|
|
|
training = TimeSeriesDataSet( |
|
train[lambda x: x.time_idx <= training_cutoff], |
|
time_idx="time_idx", |
|
target="Close", |
|
group_ids=["Ticker"], |
|
min_encoder_length=max_prediction_length, |
|
max_encoder_length=max_encoder_length, |
|
max_prediction_length=max_prediction_length, |
|
static_categoricals=["Ticker"], |
|
time_varying_known_categoricals=["month", "week_of_year", "day_of_week"], |
|
|
|
time_varying_known_reals=["time_idx"], |
|
time_varying_unknown_categoricals=[], |
|
time_varying_unknown_reals=[ |
|
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200', 'O-C' |
|
], |
|
target_normalizer=GroupNormalizer( |
|
groups=['Ticker'], transformation="softplus" |
|
), |
|
categorical_encoders={ |
|
'week_of_year':NaNLabelEncoder(add_nan=True) |
|
}, |
|
|
|
add_relative_time_idx=True, |
|
add_target_scales=True, |
|
add_encoder_length=True, |
|
) |
|
|
|
|
|
|
|
validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True) |
|
|
|
|
|
batch_size = 128 |
|
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) |
|
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0) |
|
|
|
|
|
|
|
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)]) |
|
baseline_predictions = Baseline().predict(val_dataloader) |
|
(actuals - baseline_predictions).abs().mean().item() |
|
|
|
sm = SMAPE() |
|
|
|
print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}") |
|
|
|
early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min") |
|
lr_logger = LearningRateMonitor() |
|
logger = TensorBoardLogger("lightning_logs") |
|
|
|
trainer = pl.Trainer( |
|
max_epochs=1, |
|
accelerator=ACCELERATOR, |
|
enable_model_summary=False, |
|
gradient_clip_val=0.25, |
|
limit_train_batches=10, |
|
|
|
callbacks=[lr_logger, early_stop_callback], |
|
logger=logger, |
|
) |
|
|
|
tft = TemporalFusionTransformer.from_dataset( |
|
training, |
|
learning_rate=LEARNING_RATE, |
|
lstm_layers=2, |
|
hidden_size=16, |
|
attention_head_size=2, |
|
dropout=0.2, |
|
hidden_continuous_size=8, |
|
output_size=1, |
|
loss=SMAPE(), |
|
log_interval=10, |
|
reduce_on_plateau_patience=4 |
|
) |
|
|
|
tft.to(DEVICE) |
|
trainer.fit( |
|
tft, |
|
train_dataloaders=train_dataloader, |
|
val_dataloaders=val_dataloader, |
|
) |
|
|
|
|
|
|
|
if OPTUNA: |
|
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters |
|
|
|
|
|
study = optimize_hyperparameters( |
|
train_dataloader, |
|
val_dataloader, |
|
model_path="optuna_test", |
|
n_trials=5, |
|
max_epochs=MAX_EPOCHS, |
|
gradient_clip_val_range=(0.01, 0.3), |
|
hidden_size_range=(8, 24), |
|
hidden_continuous_size_range=(8, 12), |
|
attention_head_size_range=(2, 4), |
|
learning_rate_range=(0.01, 0.05), |
|
dropout_range=(0.1, 0.25), |
|
trainer_kwargs=dict(limit_train_batches=20), |
|
reduce_on_plateau_patience=4, |
|
pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_warmup_steps=3), |
|
use_learning_rate_finder=False, |
|
) |
|
|
|
|
|
trainer = pl.Trainer( |
|
max_epochs=MAX_EPOCHS, |
|
accelerator=ACCELERATOR, |
|
enable_model_summary=False, |
|
gradient_clip_val=study.best_params['gradient_clip_val'], |
|
limit_train_batches=20, |
|
|
|
callbacks=[lr_logger, early_stop_callback], |
|
logger=logger, |
|
) |
|
|
|
tft = TemporalFusionTransformer.from_dataset( |
|
training, |
|
learning_rate=study.best_params['learning_rate'], |
|
lstm_layers=2, |
|
hidden_size=study.best_params['hidden_size'], |
|
attention_head_size=study.best_params['attention_head_size'], |
|
dropout=study.best_params['dropout'], |
|
hidden_continuous_size=study.best_params['hidden_continuous_size'], |
|
output_size=1, |
|
loss=SMAPE(), |
|
log_interval=10, |
|
reduce_on_plateau_patience=4 |
|
) |
|
|
|
tft.to(DEVICE) |
|
trainer.fit( |
|
tft, |
|
train_dataloaders=train_dataloader, |
|
val_dataloaders=val_dataloader, |
|
) |
|
|
|
|
|
best_model_path = trainer.checkpoint_callback.best_model_path |
|
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path) |
|
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)]) |
|
predictions = best_tft.predict(val_dataloader, mode="prediction") |
|
raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True) |
|
|
|
sm = SMAPE() |
|
print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions).mean(axis = 1).median().item()}") |
|
prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item() |
|
|
|
|
|
print(raw_predictions[0][0]) |
|
prax[3] = '-' |
|
prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0] |
|
t = prax[4] |
|
tm = data['Close'][len(data)-1] |
|
if(t-tm>0): |
|
prax[6] = 1 |
|
elif(t-tm==0): |
|
prax[6] = 0 |
|
else: |
|
prax[6] = -1 |
|
|
|
print("-----------") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_csv(data_list): |
|
today = date.today().strftime("%Y_%m_%d") |
|
filename = f"result_{today}.csv" |
|
file_exists = os.path.isfile(filename) |
|
with open(filename, mode='a', newline='') as csv_file: |
|
fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] |
|
writer = csv.writer(csv_file, delimiter=',') |
|
if not file_exists: |
|
writer.writerow(fieldnames) |
|
writer.writerow(data_list) |
|
csv_file.close() |
|
|
|
def fileOutput(): |
|
today = date.today().strftime("%Y_%m_%d") |
|
filename = f"result.csv" |
|
shutil.copyfile(filename, f"result_{today}.csv") |
|
return f"result_{today}.csv" |
|
|
|
def guess_date(string): |
|
for fmt in ["%Y/%m/%d", "%d-%m-%Y", "%Y%m%d", "%m/%d/%Y", "%d/%m/%Y", "%Y-%m-%d", "%d/%m/%y", "%m/%d/%y"]: |
|
try: |
|
return datetime.datetime.strptime(string, fmt).date() |
|
except ValueError: |
|
continue |
|
raise ValueError(string) |
|
|
|
|
|
|
|
def main(files): |
|
|
|
prax = [0,0,0,0,0,0,0] |
|
for idx, file in enumerate(files): |
|
print(f"File #{idx+1}: {file}") |
|
print(file.name) |
|
df = pd.read_csv(file.name) |
|
print(df['Ticker'][0]) |
|
prax[0] = df['Ticker'][0] |
|
prax[1] = df['Close'][len(df)-1] |
|
print('------------------') |
|
df = df.drop(['EMARSI'], axis=1) |
|
|
|
for i in range(len(df)): |
|
x = guess_date(df['Date/Time'][i]) |
|
df['Date/Time'][i] = x.strftime("%Y-%m-%d") |
|
df['Date/Time'] = pd.to_datetime(df['Date/Time']) |
|
df.fillna(0, inplace=True) |
|
|
|
modelTFT(df, prax) |
|
prax[2] = "TFT" |
|
generate_csv(prax) |
|
modelTFT_OpenGap(df, prax) |
|
prax[2] = "TFT_OpenGap" |
|
generate_csv(prax) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prax=["","","","","","",""] |
|
generate_csv(prax) |
|
|
|
prax = [0,0,0,0,0,0,0] |
|
f1 = fileOutput() |
|
return f1 |
|
|
|
gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_type=".csv"), outputs="file") |
|
|
|
if __name__ == "__main__": |
|
|
|
gradioApp.launch() |
|
|