AmberYifan
/

Gemma-2-9B-sft-ultrachat-safeRLHF

+---
+base_model: AmberYifan/Gemma-2-9B-sft-ultrachat
+library_name: transformers
+model_name: Gemma-2-9B-sft-ultrachat-safeRLHF
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for Gemma-2-9B-sft-ultrachat-safeRLHF
+This model is a fine-tuned version of [AmberYifan/Gemma-2-9B-sft-ultrachat](https://huggingface.co/AmberYifan/Gemma-2-9B-sft-ultrachat).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="AmberYifan/Gemma-2-9B-sft-ultrachat-safeRLHF", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.12.2
+- Transformers: 4.46.3
+- Pytorch: 2.5.1+cu118
+- Datasets: 3.2.0
+- Tokenizers: 0.20.3
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 1.0,
+    "total_flos": 26794052812800.0,
+    "train_loss": 1.5483093844519722,
+    "train_runtime": 2786.619,
+    "train_samples": 50301,
+    "train_samples_per_second": 2.581,
+    "train_steps_per_second": 0.081
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.46.3"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 1.0,
+    "total_flos": 26794052812800.0,
+    "train_loss": 1.5483093844519722,
+    "train_runtime": 2786.619,
+    "train_samples": 50301,
+    "train_samples_per_second": 2.581,
+    "train_steps_per_second": 0.081
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,372 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 225,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0044444444444444444,
+      "grad_norm": 8.214417146752451,
+      "learning_rate": 8.695652173913044e-07,
+      "loss": 2.0263,
+      "step": 1
+    },
+    {
+      "epoch": 0.022222222222222223,
+      "grad_norm": 3.6940982023267743,
+      "learning_rate": 4.347826086956522e-06,
+      "loss": 1.9582,
+      "step": 5
+    },
+    {
+      "epoch": 0.044444444444444446,
+      "grad_norm": 1.8498095767085803,
+      "learning_rate": 8.695652173913044e-06,
+      "loss": 1.7626,
+      "step": 10
+    },
+    {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 1.6070070655248392,
+      "learning_rate": 1.3043478260869566e-05,
+      "loss": 1.6818,
+      "step": 15
+    },
+    {
+      "epoch": 0.08888888888888889,
+      "grad_norm": 1.5319931708713932,
+      "learning_rate": 1.739130434782609e-05,
+      "loss": 1.6306,
+      "step": 20
+    },
+    {
+      "epoch": 0.1111111111111111,
+      "grad_norm": 1.467269040192921,
+      "learning_rate": 1.999516282291988e-05,
+      "loss": 1.6301,
+      "step": 25
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 1.4554783607714894,
+      "learning_rate": 1.9940798309400527e-05,
+      "loss": 1.6183,
+      "step": 30
+    },
+    {
+      "epoch": 0.15555555555555556,
+      "grad_norm": 1.400684418666206,
+      "learning_rate": 1.982635248222264e-05,
+      "loss": 1.6312,
+      "step": 35
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 1.373530741526711,
+      "learning_rate": 1.9652517041934357e-05,
+      "loss": 1.6135,
+      "step": 40
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 1.3446098817534498,
+      "learning_rate": 1.9420342634699893e-05,
+      "loss": 1.6207,
+      "step": 45
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 1.325817153627182,
+      "learning_rate": 1.913123250228619e-05,
+      "loss": 1.5728,
+      "step": 50
+    },
+    {
+      "epoch": 0.24444444444444444,
+      "grad_norm": 1.3469969360424408,
+      "learning_rate": 1.878693400099269e-05,
+      "loss": 1.6118,
+      "step": 55
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 1.3917425359171944,
+      "learning_rate": 1.8389528040783014e-05,
+      "loss": 1.6024,
+      "step": 60
+    },
+    {
+      "epoch": 0.28888888888888886,
+      "grad_norm": 1.4303898969445588,
+      "learning_rate": 1.7941416508447537e-05,
+      "loss": 1.5685,
+      "step": 65
+    },
+    {
+      "epoch": 0.3111111111111111,
+      "grad_norm": 1.3429999677825752,
+      "learning_rate": 1.7445307750810153e-05,
+      "loss": 1.5726,
+      "step": 70
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 1.2742933093395246,
+      "learning_rate": 1.690420020571747e-05,
+      "loss": 1.5603,
+      "step": 75
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 1.3045085959219214,
+      "learning_rate": 1.6321364279743267e-05,
+      "loss": 1.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.37777777777777777,
+      "grad_norm": 1.2646034578224143,
+      "learning_rate": 1.570032258213783e-05,
+      "loss": 1.537,
+      "step": 85
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 1.2590924521485223,
+      "learning_rate": 1.50448286344864e-05,
+      "loss": 1.542,
+      "step": 90
+    },
+    {
+      "epoch": 0.4222222222222222,
+      "grad_norm": 1.259340176409621,
+      "learning_rate": 1.4358844184753713e-05,
+      "loss": 1.5492,
+      "step": 95
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 1.2705396470791088,
+      "learning_rate": 1.3646515262826551e-05,
+      "loss": 1.5322,
+      "step": 100
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 1.2638794294944307,
+      "learning_rate": 1.2912147122272523e-05,
+      "loss": 1.5427,
+      "step": 105
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 1.2424077091728765,
+      "learning_rate": 1.2160178219764838e-05,
+      "loss": 1.5283,
+      "step": 110
+    },
+    {
+      "epoch": 0.5111111111111111,
+      "grad_norm": 1.2329795354035313,
+      "learning_rate": 1.1395153389439232e-05,
+      "loss": 1.5173,
+      "step": 115
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 1.2371556053676906,
+      "learning_rate": 1.0621696374314807e-05,
+      "loss": 1.5307,
+      "step": 120
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 1.232712434692815,
+      "learning_rate": 9.844481880796492e-06,
+      "loss": 1.5281,
+      "step": 125
+    },
+    {
+      "epoch": 0.5777777777777777,
+      "grad_norm": 1.2529176937558433,
+      "learning_rate": 9.068207325159285e-06,
+      "loss": 1.5087,
+      "step": 130
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 1.2996588051365014,
+      "learning_rate": 8.297564442776014e-06,
+      "loss": 1.5159,
+      "step": 135
+    },
+    {
+      "epoch": 0.6222222222222222,
+      "grad_norm": 1.2184035510596667,
+      "learning_rate": 7.537210931679988e-06,
+      "loss": 1.5176,
+      "step": 140
+    },
+    {
+      "epoch": 0.6444444444444445,
+      "grad_norm": 1.2490727423346757,
+      "learning_rate": 6.791742301846325e-06,
+      "loss": 1.4941,
+      "step": 145
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 1.2241017843446953,
+      "learning_rate": 6.065664100332478e-06,
+      "loss": 1.5191,
+      "step": 150
+    },
+    {
+      "epoch": 0.6888888888888889,
+      "grad_norm": 1.239277498997839,
+      "learning_rate": 5.3633646801467255e-06,
+      "loss": 1.5149,
+      "step": 155
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 1.2454092681206306,
+      "learning_rate": 4.6890886774272485e-06,
+      "loss": 1.4911,
+      "step": 160
+    },
+    {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 1.1922483231102439,
+      "learning_rate": 4.046911357233343e-06,
+      "loss": 1.4784,
+      "step": 165
+    },
+    {
+      "epoch": 0.7555555555555555,
+      "grad_norm": 1.2247982014982308,
+      "learning_rate": 3.440713983000601e-06,
+      "loss": 1.4801,
+      "step": 170
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 1.239899805156239,
+      "learning_rate": 2.8741603585249312e-06,
+      "loss": 1.4583,
+      "step": 175
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.2422952358608288,
+      "learning_rate": 2.3506746842535244e-06,
+      "loss": 1.4918,
+      "step": 180
+    },
+    {
+      "epoch": 0.8222222222222222,
+      "grad_norm": 1.2349149954091598,
+      "learning_rate": 1.8734208617174986e-06,
+      "loss": 1.4642,
+      "step": 185
+    },
+    {
+      "epoch": 0.8444444444444444,
+      "grad_norm": 1.2203735788336134,
+      "learning_rate": 1.4452833711883629e-06,
+      "loss": 1.4605,
+      "step": 190
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 1.2111942025579017,
+      "learning_rate": 1.0688498381320855e-06,
+      "loss": 1.4518,
+      "step": 195
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 1.2227205188182604,
+      "learning_rate": 7.463953938275859e-07,
+      "loss": 1.471,
+      "step": 200
+    },
+    {
+      "epoch": 0.9111111111111111,
+      "grad_norm": 1.2352427530088093,
+      "learning_rate": 4.798689246727006e-07,
+      "loss": 1.4786,
+      "step": 205
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 1.2107626288198272,
+      "learning_rate": 2.708812932856253e-07,
+      "loss": 1.4627,
+      "step": 210
+    },
+    {
+      "epoch": 0.9555555555555556,
+      "grad_norm": 1.2338739350312762,
+      "learning_rate": 1.206956025924333e-07,
+      "loss": 1.4702,
+      "step": 215
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 1.2316651082897998,
+      "learning_rate": 3.0219561743707326e-08,
+      "loss": 1.4524,
+      "step": 220
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.379154124562403,
+      "learning_rate": 0.0,
+      "loss": 1.4667,
+      "step": 225
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.4416327476501465,
+      "eval_runtime": 82.5389,
+      "eval_samples_per_second": 14.539,
+      "eval_steps_per_second": 0.46,
+      "step": 225
+    },
+    {
+      "epoch": 1.0,
+      "step": 225,
+      "total_flos": 26794052812800.0,
+      "train_loss": 1.5483093844519722,
+      "train_runtime": 2786.619,
+      "train_samples_per_second": 2.581,
+      "train_steps_per_second": 0.081
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 225,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 26794052812800.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}