|
seed: 999 |
|
cudnn_deterministic: false |
|
train_data_jsons: |
|
- data/train/1splits/data.0.tiny.json |
|
valid_data_jsons: |
|
- data/val/1splits/data.0.tiny.json |
|
batch_scale: 2000 |
|
max_length: 1500 |
|
min_length: 1 |
|
n_worker: 4 |
|
local_rank: -1 |
|
minibatch_debug: -1 |
|
n_epoch: 1 |
|
grad_accum: 64 |
|
global_learning_rate: 2.0e-06 |
|
local_learning_rate: 4.0e-06 |
|
grad_clip: 2.0 |
|
warmup_steps: 10000 |
|
total_steps: 55 |
|
dim: 4096 |
|
text_card: 1 |
|
existing_text_padding_id: 3 |
|
n_q: 16 |
|
dep_q: 8 |
|
card: 2048 |
|
num_heads: 32 |
|
num_layers: 32 |
|
hidden_scale: 4.5 |
|
causal: true |
|
context: 3000 |
|
exp_dir: exp_data/Moshi/v3_full_emo_v0 |
|
print_freq: 5 |
|
save_interval: 10000 |
|
resume: null |
|
merged_model_path: exp_data/Moshi/v3_full_emo_v0/model_merged.safetensors |
|
|