Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -3,12 +3,9 @@ library_name: transformers
 license: apache-2.0
 base_model: mistralai/Mistral-7B-Instruct-v0.1
 tags:
-- alignment-handbook
-- trl
-- sft
-- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 datasets:
 - generator
@@ -23,8 +20,6 @@ should probably proofread and complete it, then remove this comment. -->
 # mistral_sky_o1_1_full
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the generator dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.6347
 ## Model description

 license: apache-2.0
 base_model: mistralai/Mistral-7B-Instruct-v0.1
 tags:
 - trl
 - sft
+- alignment-handbook
 - generated_from_trainer
 datasets:
 - generator
 # mistral_sky_o1_1_full
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the generator dataset.
 ## Model description

all_results.json CHANGED Viewed

@@ -6,9 +6,9 @@
     "eval_samples_per_second": 21.796,
     "eval_steps_per_second": 1.362,
     "total_flos": 7590012518400.0,
-    "train_loss": 0.6941181943841177,
-    "train_runtime": 407.2099,
     "train_samples": 980,
-    "train_samples_per_second": 5.727,
-    "train_steps_per_second": 0.179
 }

     "eval_samples_per_second": 21.796,
     "eval_steps_per_second": 1.362,
     "total_flos": 7590012518400.0,
+    "train_loss": 0.6951394881287666,
+    "train_runtime": 410.617,
     "train_samples": 980,
+    "train_samples_per_second": 5.679,
+    "train_steps_per_second": 0.178
 }

config.json CHANGED Viewed

@@ -22,6 +22,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
-  "use_cache": true,
   "vocab_size": 32000
 }

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.44.2",
+  "use_cache": false,
   "vocab_size": 32000
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4b5a870e40868945d2bcc718786488ba8a5cafa5c491558a2b924b220619013
 size 4943162336

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0fa9750b3d938443ec67f64f45d0d1457c396d37c01e81ef5438ece4129fbc3
 size 4943162336

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:930a086b487aab307130f3f5447b8708998139bb35cfb28154dc3cf8c5dcfec0
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e3ec747b1e21090135c51c98680cc7e80603e518f05edd5b9a03ff157e158e6
 size 4999819336

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:959da2926670373403c542b1c2eb04ab30479f5a2d468800bf02bce578053bac
 size 4540516344

 version https://git-lfs.github.com/spec/v1
+oid sha256:863f5f3097f5b3be4cd75a33d238edaed4d00ad80f55c4943e49563c4993614d
 size 4540516344

runs/Jan15_00-02-47_dgx-a100-12/events.out.tfevents.1736895781.dgx-a100-12.2673814.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4275b562043a634e255790cac51e6909bcfc7ac050fc94d2251cf5963fccde03
+size 6162

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
     "total_flos": 7590012518400.0,
-    "train_loss": 0.6941181943841177,
-    "train_runtime": 407.2099,
     "train_samples": 980,
-    "train_samples_per_second": 5.727,
-    "train_steps_per_second": 0.179
 }

 {
     "epoch": 1.0,
     "total_flos": 7590012518400.0,
+    "train_loss": 0.6951394881287666,
+    "train_runtime": 410.617,
     "train_samples": 980,
+    "train_samples_per_second": 5.679,
+    "train_steps_per_second": 0.178
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:477e90d473919765bded7aae41ef8fc54da0296fa932bcd189e8644810ac9f76
 size 6968

 version https://git-lfs.github.com/spec/v1
+oid sha256:f62d900096d0a36cf11b1e5ec611965b5f73980cc7fbee886d4c18a705c719bd
 size 6968