selfssm_400m_20b_swe2k / hyperparameters.json
amy-hyunji-lee's picture
Add files using upload-large-folder tool
9b06537 verified
{
"train_config": "config/is_selfssm.swe_2048.json",
"model_name": "Selfssm",
"devices": 8,
"train_name": "is_selfssm.swe_2048",
"name": "is_selfssm.swe_2048_Selfssm",
"model_type": "selfssm",
"nodes": 1,
"block_size": 4096,
"max_tokens": 20000000000,
"global_batch_size": 512,
"micro_batch_size": 2,
"batch_size": 64,
"gradient_accumulation_steps": 32,
"learning_rate": 0.0004,
"total_evals": 400,
"warmup_tokens": 200000000,
"eval_iters": 200,
"resume_ckpt": "token-3775135744.iter-014400-ckpt.pth",
"log_step_interval": 10,
"save_step_interval": 300,
"eval_step_interval": 300,
"num_extrapol": 4,
"weight_decay": 0.1,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"decay_lr": true,
"min_lr": 4e-05,
"attn_type": "flash_attention_2",
"attn_window_size": 2048,
"output_attentions": false,
"output_hidden_states": false,
"load_linear_data": "next",
"load_linear_overlap": 0,
"load_linear_also_copy_rate": 0,
"data_do_shuffle": false,
"load_input_data": "",
"log_iter_interval": 320,
"tok": "llama2_tok",
"transformer_hidden_size": 1024,
"transformer_num_hidden_layers": 12,
"transformer_intermediate_size": 4096,
"linear_hidden_states": 2048,
"linear_cal_loss": false,
"linear_num_hidden_layers": 12,
"linear_hidden_name": "output_embedding",
"linear_multiply_self_attn_B": false,
"linear_input_w_selfattn": false,
"integrate_self_attn_weight": "",
"integrate_self_attn_weight_rate": 1,
"tie_word_embedding_layer": true,
"linear_up_project": false,
"linear_up_project_size": 0,
"interact_mode": "cross_attn"
}