File size: 6,247 Bytes
85653bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
class Config:
"""
Configuration class for the entire project.
"""
def __init__(self):
# =================================================================
# Data & Feature Parameters
# =================================================================
# TODO: Update this path to your Qlib data directory.
self.qlib_data_path = "~/.qlib/qlib_data/cn_data"
self.instrument = 'csi300'
# Overall time range for data loading from Qlib.
self.dataset_begin_time = "2011-01-01"
self.dataset_end_time = '2025-06-05'
# Sliding window parameters for creating samples.
self.lookback_window = 90 # Number of past time steps for input.
self.predict_window = 10 # Number of future time steps for prediction.
self.max_context = 512 # Maximum context length for the model.
# Features to be used from the raw data.
self.feature_list = ['open', 'high', 'low', 'close', 'vol', 'amt']
# Time-based features to be generated.
self.time_feature_list = ['minute', 'hour', 'weekday', 'day', 'month']
# =================================================================
# Dataset Splitting & Paths
# =================================================================
# Note: The validation/test set starts earlier than the training/validation set ends
# to account for the `lookback_window`.
self.train_time_range = ["2011-01-01", "2022-12-31"]
self.val_time_range = ["2022-09-01", "2024-06-30"]
self.test_time_range = ["2024-04-01", "2025-06-05"]
self.backtest_time_range = ["2024-07-01", "2025-06-05"]
# TODO: Directory to save the processed, pickled datasets.
self.dataset_path = "./data/processed_datasets"
# =================================================================
# Training Hyperparameters
# =================================================================
self.clip = 5.0 # Clipping value for normalized data to prevent outliers.
self.epochs = 30
self.log_interval = 100 # Log training status every N batches.
self.batch_size = 50 # Batch size per GPU.
# Number of samples to draw for one "epoch" of training/validation.
# This is useful for large datasets where a true epoch is too long.
self.n_train_iter = 2000 * self.batch_size
self.n_val_iter = 400 * self.batch_size
# Learning rates for different model components.
self.tokenizer_learning_rate = 2e-4
self.predictor_learning_rate = 4e-5
# Gradient accumulation to simulate a larger batch size.
self.accumulation_steps = 1
# AdamW optimizer parameters.
self.adam_beta1 = 0.9
self.adam_beta2 = 0.95
self.adam_weight_decay = 0.1
# Miscellaneous
self.seed = 100 # Global random seed for reproducibility.
# =================================================================
# Experiment Logging & Saving
# =================================================================
self.use_comet = True # Set to False if you don't want to use Comet ML
self.comet_config = {
# It is highly recommended to load secrets from environment variables
# for security purposes. Example: os.getenv("COMET_API_KEY")
"api_key": "YOUR_COMET_API_KEY",
"project_name": "Kronos-Finetune-Demo",
"workspace": "your_comet_workspace" # TODO: Change to your Comet ML workspace name
}
self.comet_tag = 'finetune_demo'
self.comet_name = 'finetune_demo'
# Base directory for saving model checkpoints and results.
# Using a general 'outputs' directory is a common practice.
self.save_path = "./outputs/models"
self.tokenizer_save_folder_name = 'finetune_tokenizer_demo'
self.predictor_save_folder_name = 'finetune_predictor_demo'
self.backtest_save_folder_name = 'finetune_backtest_demo'
# Path for backtesting results.
self.backtest_result_path = "./outputs/backtest_results"
# =================================================================
# Model & Checkpoint Paths
# =================================================================
# TODO: Update these paths to your pretrained model locations.
# These can be local paths or Hugging Face Hub model identifiers.
self.pretrained_tokenizer_path = "path/to/your/Kronos-Tokenizer-base"
self.pretrained_predictor_path = "path/to/your/Kronos-small"
# Paths to the fine-tuned models, derived from the save_path.
# These will be generated automatically during training.
self.finetuned_tokenizer_path = f"{self.save_path}/{self.tokenizer_save_folder_name}/checkpoints/best_model"
self.finetuned_predictor_path = f"{self.save_path}/{self.predictor_save_folder_name}/checkpoints/best_model"
# =================================================================
# Backtesting Parameters
# =================================================================
self.backtest_n_symbol_hold = 50 # Number of symbols to hold in the portfolio.
self.backtest_n_symbol_drop = 5 # Number of symbols to drop from the pool.
self.backtest_hold_thresh = 5 # Minimum holding period for a stock.
self.inference_T = 0.6
self.inference_top_p = 0.9
self.inference_top_k = 0
self.inference_sample_count = 5
self.backtest_batch_size = 1000
self.backtest_benchmark = self._set_benchmark(self.instrument)
def _set_benchmark(self, instrument):
dt_benchmark = {
'csi800': "SH000906",
'csi1000': "SH000852",
'csi300': "SH000300",
}
if instrument in dt_benchmark:
return dt_benchmark[instrument]
else:
raise ValueError(f"Benchmark not defined for instrument: {instrument}")
|