| config_name: "JackFram/llama-68m" | |
| tokenizer_name: "JackFram/llama-68m" | |
| validation_split_percentage: 2 | |
| train_file: "/home/dshteyma/shareGPT_data/ShareGPT_V3_unfiltered_cleaned_split.json" | |
| dataset_name_hub: "anon8231489123/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json" | |
| dataset_name_local: "ShareGPT" | |
| # max_train_samples: 1000 | |
| # max_eval_samples: 10 | |
| do_train: True | |
| do_eval: True | |
| output_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs" | |
| overwrite_output_dir: True | |
| per_device_train_batch_size: 4 | |
| gradient_accumulation_steps: 1 | |
| report_to: "tensorboard" | |
| logging_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs" | |
| logging_steps: 500 | |
| save_steps: 1000 | |
| eval_strategy: "steps" | |
| eval_steps: 1000 | |
| learning_rate: 0.0001 | |
| gradient_accumulation_steps: 1 | |
| weight_decay: 0.01 | |
| warmup_ratio: 0.05 | |
| push_to_hub: True | |
| hub_model_id: "DorinSht/recreate_llama_68M_vanilla" | |