Training in progress, step 100

Files changed (7) hide show

README.md CHANGED Viewed

@@ -34,10 +34,10 @@ This model was trained with SFT.
 ### Framework versions
-- TRL: 0.19.0
-- Transformers: 4.53.0
 - Pytorch: 2.6.0+cu124
-- Datasets: 3.6.0
 - Tokenizers: 0.21.2
 ## Citations

 ### Framework versions
+- TRL: 0.19.1
+- Transformers: 4.53.2
 - Pytorch: 2.6.0+cu124
+- Datasets: 4.0.0
 - Tokenizers: 0.21.2
 ## Citations

adapter_config.json CHANGED Viewed

@@ -20,42 +20,44 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "linear_end",
-    "linear_left",
-    "post",
     "up_proj",
     "q_proj",
     "embedding_projection",
-    "gate_proj",
-    "ffw_layer_2",
-    "o_proj",
-    "v_proj",
-    "altup_projections.1",
-    "down_proj",
-    "prediction_coefs",
-    "altup_projections.0",
     "k_proj",
     "per_layer_input_gate",
-    "altup_unembed_projections.1",
-    "input_proj_linear",
-    "per_layer_model_projection",
     "linear_start",
-    "correction_coefs",
-    "altup_unembed_projections.2",
-    "linear_right",
-    "altup_unembed_projections.0",
     "pos_proj",
-    "ffw_layer_1",
-    "per_layer_projection",
     "modality_router",
-    "altup_projections.2"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "per_layer_model_projection",
+    "altup_projections.0",
+    "correction_coefs",
+    "altup_projections.2",
+    "down_proj",
+    "ffw_layer_2",
+    "per_layer_projection",
+    "input_proj_linear",
+    "v_proj",
+    "altup_unembed_projections.2",
     "up_proj",
+    "o_proj",
     "q_proj",
     "embedding_projection",
     "k_proj",
+    "post",
+    "ffw_layer_1",
+    "altup_projections.1",
+    "altup_unembed_projections.0",
     "per_layer_input_gate",
+    "gate_proj",
+    "linear_end",
+    "linear_left",
     "linear_start",
     "pos_proj",
+    "linear_right",
     "modality_router",
+    "altup_unembed_projections.1",
+    "prediction_coefs"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7edd90144d3df68ff24938748ba2b0fd3c3e5bdcab789ffcfa95240c9c68c910
 size 149802432

 version https://git-lfs.github.com/spec/v1
+oid sha256:00f42bc7081540955803b0dcbff13a15f9388b327f68f63c04449587d8b1c841
 size 149802432

runs/Jul16_10-48-07_2d2563e51c9f/events.out.tfevents.1752662905.2d2563e51c9f.677.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:adc2a54c237d4d1bb685bee7adce21c2872b2cbe07d65f0ebcb62129ea623e32
+size 9142

runs/Jul16_10-50-13_2d2563e51c9f/events.out.tfevents.1752663019.2d2563e51c9f.677.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b7b1edcf100c01328aa8ef70ad6a81631be84d0bf61a89914752637f038d3cb
+size 9142

runs/Jul16_10-51-20_2d2563e51c9f/events.out.tfevents.1752663086.2d2563e51c9f.677.2 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2c46fa654487ef6b2f09c19a93b11c5bb5050d1e996a74b3a95cb0ec5fd6cd0
+size 12382

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39417d8c7e1909db45b4937df5d36aaf5f0875b5d264733ff9bca417831a1923
-size 5816

 version https://git-lfs.github.com/spec/v1
+oid sha256:66231d69cab4f7f2ddd62fbaa5f73322a540ab12266e1b62f63d0d76fe83326a
+size 5752