Model save

Browse files

Files changed (5) hide show

README.md +58 -0
all_results.json +8 -0
generation_config.json +14 -0
train_results.json +8 -0
trainer_state.json +279 -0

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: Qwen/Qwen2.5-3B-Instruct
+library_name: transformers
+model_name: Agentic-Qwen-3B-e2-lr5-b8
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for Agentic-Qwen-3B-e2-lr5-b8
+This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="akseljoonas/Agentic-Qwen-3B-e2-lr5-b8", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/akseljoonas-university-of-groningen/huggingface/runs/w3hmoma8)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.0
+- Transformers: 4.52.4
+- Pytorch: 2.6.0
+- Datasets: 3.6.0
+- Tokenizers: 0.21.1
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 30554840104960.0,
+    "train_loss": 0.4982214890309234,
+    "train_runtime": 327.6119,
+    "train_samples": 1845,
+    "train_samples_per_second": 3.242,
+    "train_steps_per_second": 0.409
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.05,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.52.4"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 30554840104960.0,
+    "train_loss": 0.4982214890309234,
+    "train_runtime": 327.6119,
+    "train_samples": 1845,
+    "train_samples_per_second": 3.242,
+    "train_steps_per_second": 0.409
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,279 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 134,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.07462686567164178,
+      "grad_norm": 1.097984505829403,
+      "learning_rate": 1.4285714285714285e-05,
+      "loss": 1.0711,
+      "mean_token_accuracy": 0.7844904899597168,
+      "num_tokens": 808107.0,
+      "step": 5
+    },
+    {
+      "epoch": 0.14925373134328357,
+      "grad_norm": 0.5592347657844529,
+      "learning_rate": 3.2142857142857144e-05,
+      "loss": 0.7776,
+      "mean_token_accuracy": 0.8252509593963623,
+      "num_tokens": 1609080.0,
+      "step": 10
+    },
+    {
+      "epoch": 0.22388059701492538,
+      "grad_norm": 2.3311733604872757,
+      "learning_rate": 5e-05,
+      "loss": 0.6944,
+      "mean_token_accuracy": 0.8515042781829834,
+      "num_tokens": 2364819.0,
+      "step": 15
+    },
+    {
+      "epoch": 0.29850746268656714,
+      "grad_norm": 0.539840904336575,
+      "learning_rate": 4.791666666666667e-05,
+      "loss": 0.5766,
+      "mean_token_accuracy": 0.8671263098716736,
+      "num_tokens": 3137298.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.373134328358209,
+      "grad_norm": 0.41546864000168066,
+      "learning_rate": 4.5833333333333334e-05,
+      "loss": 0.5163,
+      "mean_token_accuracy": 0.8827684998512269,
+      "num_tokens": 3905642.0,
+      "step": 25
+    },
+    {
+      "epoch": 0.44776119402985076,
+      "grad_norm": 0.353169354176409,
+      "learning_rate": 4.375e-05,
+      "loss": 0.5541,
+      "mean_token_accuracy": 0.8705575823783874,
+      "num_tokens": 4698822.0,
+      "step": 30
+    },
+    {
+      "epoch": 0.5223880597014925,
+      "grad_norm": 0.3367736704335482,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.5299,
+      "mean_token_accuracy": 0.8778650641441346,
+      "num_tokens": 5502368.0,
+      "step": 35
+    },
+    {
+      "epoch": 0.5970149253731343,
+      "grad_norm": 0.32014959900195933,
+      "learning_rate": 3.958333333333333e-05,
+      "loss": 0.4523,
+      "mean_token_accuracy": 0.9003850221633911,
+      "num_tokens": 6264696.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.6716417910447762,
+      "grad_norm": 0.3766002391001425,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.5118,
+      "mean_token_accuracy": 0.8829670548439026,
+      "num_tokens": 7073616.0,
+      "step": 45
+    },
+    {
+      "epoch": 0.746268656716418,
+      "grad_norm": 0.34901853331777233,
+      "learning_rate": 3.541666666666667e-05,
+      "loss": 0.6568,
+      "mean_token_accuracy": 0.8654794096946716,
+      "num_tokens": 7870867.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.8208955223880597,
+      "grad_norm": 0.41496433526889975,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.5018,
+      "mean_token_accuracy": 0.879916000366211,
+      "num_tokens": 8654369.0,
+      "step": 55
+    },
+    {
+      "epoch": 0.8955223880597015,
+      "grad_norm": 0.35710466457229734,
+      "learning_rate": 3.125e-05,
+      "loss": 0.4922,
+      "mean_token_accuracy": 0.8849031329154968,
+      "num_tokens": 9406298.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.9701492537313433,
+      "grad_norm": 0.2759798405972834,
+      "learning_rate": 2.916666666666667e-05,
+      "loss": 0.5385,
+      "mean_token_accuracy": 0.8858557939529419,
+      "num_tokens": 10223729.0,
+      "step": 65
+    },
+    {
+      "epoch": 1.044776119402985,
+      "grad_norm": 0.37990993169333426,
+      "learning_rate": 2.7083333333333332e-05,
+      "loss": 0.4201,
+      "mean_token_accuracy": 0.9078471302986145,
+      "num_tokens": 10899620.0,
+      "step": 70
+    },
+    {
+      "epoch": 1.1194029850746268,
+      "grad_norm": 0.2768802930431144,
+      "learning_rate": 2.5e-05,
+      "loss": 0.4874,
+      "mean_token_accuracy": 0.8900921702384949,
+      "num_tokens": 11673700.0,
+      "step": 75
+    },
+    {
+      "epoch": 1.1940298507462686,
+      "grad_norm": 0.3118601164754786,
+      "learning_rate": 2.2916666666666667e-05,
+      "loss": 0.4586,
+      "mean_token_accuracy": 0.8888717889785767,
+      "num_tokens": 12471762.0,
+      "step": 80
+    },
+    {
+      "epoch": 1.2686567164179103,
+      "grad_norm": 3.127142887640936,
+      "learning_rate": 2.0833333333333336e-05,
+      "loss": 0.3011,
+      "mean_token_accuracy": 0.9234537720680237,
+      "num_tokens": 13235430.0,
+      "step": 85
+    },
+    {
+      "epoch": 1.3432835820895521,
+      "grad_norm": 0.28349474176463113,
+      "learning_rate": 1.8750000000000002e-05,
+      "loss": 0.355,
+      "mean_token_accuracy": 0.9111138224601746,
+      "num_tokens": 14035158.0,
+      "step": 90
+    },
+    {
+      "epoch": 1.417910447761194,
+      "grad_norm": 0.4656290158665034,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 0.3988,
+      "mean_token_accuracy": 0.9019946575164794,
+      "num_tokens": 14818394.0,
+      "step": 95
+    },
+    {
+      "epoch": 1.4925373134328357,
+      "grad_norm": 0.35912081575893856,
+      "learning_rate": 1.4583333333333335e-05,
+      "loss": 0.3618,
+      "mean_token_accuracy": 0.9122716307640075,
+      "num_tokens": 15598792.0,
+      "step": 100
+    },
+    {
+      "epoch": 1.5671641791044775,
+      "grad_norm": 0.30779204946938893,
+      "learning_rate": 1.25e-05,
+      "loss": 0.3203,
+      "mean_token_accuracy": 0.9191259384155274,
+      "num_tokens": 16383948.0,
+      "step": 105
+    },
+    {
+      "epoch": 1.6417910447761193,
+      "grad_norm": 1.0451064339674097,
+      "learning_rate": 1.0416666666666668e-05,
+      "loss": 0.457,
+      "mean_token_accuracy": 0.8926056742668151,
+      "num_tokens": 17148084.0,
+      "step": 110
+    },
+    {
+      "epoch": 1.716417910447761,
+      "grad_norm": 0.3071303378120666,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.3756,
+      "mean_token_accuracy": 0.908941102027893,
+      "num_tokens": 17951763.0,
+      "step": 115
+    },
+    {
+      "epoch": 1.7910447761194028,
+      "grad_norm": 0.30659106999758134,
+      "learning_rate": 6.25e-06,
+      "loss": 0.4142,
+      "mean_token_accuracy": 0.9020455718040467,
+      "num_tokens": 18763975.0,
+      "step": 120
+    },
+    {
+      "epoch": 1.8656716417910446,
+      "grad_norm": 0.5796181274690525,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 0.4989,
+      "mean_token_accuracy": 0.8861546277999878,
+      "num_tokens": 19532240.0,
+      "step": 125
+    },
+    {
+      "epoch": 1.9402985074626866,
+      "grad_norm": 0.3415301126393925,
+      "learning_rate": 2.0833333333333334e-06,
+      "loss": 0.3847,
+      "mean_token_accuracy": 0.9057931303977966,
+      "num_tokens": 20314776.0,
+      "step": 130
+    },
+    {
+      "epoch": 2.0,
+      "mean_token_accuracy": 0.9028400182723999,
+      "num_tokens": 20867736.0,
+      "step": 134,
+      "total_flos": 30554840104960.0,
+      "train_loss": 0.4982214890309234,
+      "train_runtime": 327.6119,
+      "train_samples_per_second": 3.242,
+      "train_steps_per_second": 0.409
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 134,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 30554840104960.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}