{ "compute": { "accelerator": "A10G", "instance": "g5.2xlarge" }, "model": { "name": "microsoft/phi-4", "tokenizer": "microsoft/phi-4" }, "dataset": { "name": "adel67460/straburo-dataset", "train_split": "train", "eval_split": "train", "validation_size": 0.2 }, "training": { "epochs": 3, "batch_size": 1, "learning_rate": 2e-5, "warmup_ratio": 0.03, "evaluation_strategy": "steps", "eval_steps": 100, "save_strategy": "steps", "save_steps": 100, "save_total_limit": 1, "load_best_model_at_end": true, "metric_for_best_model": "loss", "greater_is_better": false, "gradient_accumulation_steps": 16, "logging_steps": 1, "fp16": false, "bf16": true }, "framework": "gradio", "base_model": "microsoft/phi-4", "task": "text-generation" }