Training in progress, step 100
Browse files- README.md +3 -3
- adapter_config.json +23 -21
- adapter_model.safetensors +1 -1
- runs/Jul16_10-48-07_2d2563e51c9f/events.out.tfevents.1752662905.2d2563e51c9f.677.0 +3 -0
- runs/Jul16_10-50-13_2d2563e51c9f/events.out.tfevents.1752663019.2d2563e51c9f.677.1 +3 -0
- runs/Jul16_10-51-20_2d2563e51c9f/events.out.tfevents.1752663086.2d2563e51c9f.677.2 +3 -0
- training_args.bin +2 -2
README.md
CHANGED
@@ -34,10 +34,10 @@ This model was trained with SFT.
|
|
34 |
|
35 |
### Framework versions
|
36 |
|
37 |
-
- TRL: 0.19.
|
38 |
-
- Transformers: 4.53.
|
39 |
- Pytorch: 2.6.0+cu124
|
40 |
-
- Datasets:
|
41 |
- Tokenizers: 0.21.2
|
42 |
|
43 |
## Citations
|
|
|
34 |
|
35 |
### Framework versions
|
36 |
|
37 |
+
- TRL: 0.19.1
|
38 |
+
- Transformers: 4.53.2
|
39 |
- Pytorch: 2.6.0+cu124
|
40 |
+
- Datasets: 4.0.0
|
41 |
- Tokenizers: 0.21.2
|
42 |
|
43 |
## Citations
|
adapter_config.json
CHANGED
@@ -20,42 +20,44 @@
|
|
20 |
"megatron_core": "megatron.core",
|
21 |
"modules_to_save": null,
|
22 |
"peft_type": "LORA",
|
|
|
23 |
"r": 16,
|
24 |
"rank_pattern": {},
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
"up_proj",
|
|
|
31 |
"q_proj",
|
32 |
"embedding_projection",
|
33 |
-
"gate_proj",
|
34 |
-
"ffw_layer_2",
|
35 |
-
"o_proj",
|
36 |
-
"v_proj",
|
37 |
-
"altup_projections.1",
|
38 |
-
"down_proj",
|
39 |
-
"prediction_coefs",
|
40 |
-
"altup_projections.0",
|
41 |
"k_proj",
|
|
|
|
|
|
|
|
|
42 |
"per_layer_input_gate",
|
43 |
-
"
|
44 |
-
"
|
45 |
-
"
|
46 |
"linear_start",
|
47 |
-
"correction_coefs",
|
48 |
-
"altup_unembed_projections.2",
|
49 |
-
"linear_right",
|
50 |
-
"altup_unembed_projections.0",
|
51 |
"pos_proj",
|
52 |
-
"
|
53 |
-
"per_layer_projection",
|
54 |
"modality_router",
|
55 |
-
"
|
|
|
56 |
],
|
57 |
"task_type": "CAUSAL_LM",
|
58 |
"trainable_token_indices": null,
|
59 |
"use_dora": false,
|
|
|
60 |
"use_rslora": false
|
61 |
}
|
|
|
20 |
"megatron_core": "megatron.core",
|
21 |
"modules_to_save": null,
|
22 |
"peft_type": "LORA",
|
23 |
+
"qalora_group_size": 16,
|
24 |
"r": 16,
|
25 |
"rank_pattern": {},
|
26 |
"revision": null,
|
27 |
"target_modules": [
|
28 |
+
"per_layer_model_projection",
|
29 |
+
"altup_projections.0",
|
30 |
+
"correction_coefs",
|
31 |
+
"altup_projections.2",
|
32 |
+
"down_proj",
|
33 |
+
"ffw_layer_2",
|
34 |
+
"per_layer_projection",
|
35 |
+
"input_proj_linear",
|
36 |
+
"v_proj",
|
37 |
+
"altup_unembed_projections.2",
|
38 |
"up_proj",
|
39 |
+
"o_proj",
|
40 |
"q_proj",
|
41 |
"embedding_projection",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
"k_proj",
|
43 |
+
"post",
|
44 |
+
"ffw_layer_1",
|
45 |
+
"altup_projections.1",
|
46 |
+
"altup_unembed_projections.0",
|
47 |
"per_layer_input_gate",
|
48 |
+
"gate_proj",
|
49 |
+
"linear_end",
|
50 |
+
"linear_left",
|
51 |
"linear_start",
|
|
|
|
|
|
|
|
|
52 |
"pos_proj",
|
53 |
+
"linear_right",
|
|
|
54 |
"modality_router",
|
55 |
+
"altup_unembed_projections.1",
|
56 |
+
"prediction_coefs"
|
57 |
],
|
58 |
"task_type": "CAUSAL_LM",
|
59 |
"trainable_token_indices": null,
|
60 |
"use_dora": false,
|
61 |
+
"use_qalora": false,
|
62 |
"use_rslora": false
|
63 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 149802432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00f42bc7081540955803b0dcbff13a15f9388b327f68f63c04449587d8b1c841
|
3 |
size 149802432
|
runs/Jul16_10-48-07_2d2563e51c9f/events.out.tfevents.1752662905.2d2563e51c9f.677.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adc2a54c237d4d1bb685bee7adce21c2872b2cbe07d65f0ebcb62129ea623e32
|
3 |
+
size 9142
|
runs/Jul16_10-50-13_2d2563e51c9f/events.out.tfevents.1752663019.2d2563e51c9f.677.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b7b1edcf100c01328aa8ef70ad6a81631be84d0bf61a89914752637f038d3cb
|
3 |
+
size 9142
|
runs/Jul16_10-51-20_2d2563e51c9f/events.out.tfevents.1752663086.2d2563e51c9f.677.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2c46fa654487ef6b2f09c19a93b11c5bb5050d1e996a74b3a95cb0ec5fd6cd0
|
3 |
+
size 12382
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66231d69cab4f7f2ddd62fbaa5f73322a540ab12266e1b62f63d0d76fe83326a
|
3 |
+
size 5752
|