{ | |
"compute": { | |
"accelerator": "A10G", | |
"instance": "g5.2xlarge" | |
}, | |
"model": { | |
"name": "microsoft/phi-4", | |
"tokenizer": "microsoft/phi-4" | |
}, | |
"dataset": { | |
"name": "adel67460/straburo-dataset", | |
"train_split": "train", | |
"eval_split": "train", | |
"validation_size": 0.2 | |
}, | |
"training": { | |
"epochs": 3, | |
"batch_size": 1, | |
"learning_rate": 2e-5, | |
"warmup_ratio": 0.03, | |
"evaluation_strategy": "steps", | |
"eval_steps": 100, | |
"save_strategy": "steps", | |
"save_steps": 100, | |
"save_total_limit": 1, | |
"load_best_model_at_end": true, | |
"metric_for_best_model": "loss", | |
"greater_is_better": false, | |
"gradient_accumulation_steps": 16, | |
"logging_steps": 1, | |
"fp16": false, | |
"bf16": true | |
}, | |
"framework": "gradio", | |
"base_model": "microsoft/phi-4", | |
"task": "text-generation" | |
} | |