seed: 999 cudnn_deterministic: false train_data_jsons: - data/train/1splits/data.0.tiny.json valid_data_jsons: - data/val/1splits/data.0.tiny.json batch_scale: 2000 max_length: 1500 min_length: 1 n_worker: 4 local_rank: -1 minibatch_debug: -1 n_epoch: 1 grad_accum: 64 global_learning_rate: 2.0e-06 local_learning_rate: 4.0e-06 grad_clip: 2.0 warmup_steps: 10000 total_steps: 55 dim: 4096 text_card: 1 existing_text_padding_id: 3 n_q: 16 dep_q: 8 card: 2048 num_heads: 32 num_layers: 32 hidden_scale: 4.5 causal: true context: 3000 exp_dir: exp_data/Moshi/v3_full_emo_v0 print_freq: 5 save_interval: 10000 resume: null merged_model_path: exp_data/Moshi/v3_full_emo_v0/model_merged.safetensors