Training in progress, step 100
Browse files- README.md +2 -2
- adapter_config.json +20 -20
- runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298118.66bfc5776606.1352.0 +3 -0
- runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298272.66bfc5776606.1352.1 +3 -0
- runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298547.66bfc5776606.1352.2 +3 -0
- runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298624.66bfc5776606.1352.3 +3 -0
- runs/Jun30_15-51-14_66bfc5776606/events.out.tfevents.1751298679.66bfc5776606.1352.4 +3 -0
- runs/Jun30_15-51-36_66bfc5776606/events.out.tfevents.1751298699.66bfc5776606.1352.5 +3 -0
- runs/Jun30_16-20-23_66bfc5776606/events.out.tfevents.1751300433.66bfc5776606.20495.0 +3 -0
- training_args.bin +2 -2
README.md
CHANGED
@@ -4,8 +4,8 @@ library_name: transformers
|
|
4 |
model_name: gemma-3n-finevideo
|
5 |
tags:
|
6 |
- generated_from_trainer
|
7 |
-
- sft
|
8 |
- trl
|
|
|
9 |
licence: license
|
10 |
---
|
11 |
|
@@ -38,7 +38,7 @@ This model was trained with SFT.
|
|
38 |
- Transformers: 4.53.0
|
39 |
- Pytorch: 2.6.0+cu124
|
40 |
- Datasets: 3.6.0
|
41 |
-
- Tokenizers: 0.21.
|
42 |
|
43 |
## Citations
|
44 |
|
|
|
4 |
model_name: gemma-3n-finevideo
|
5 |
tags:
|
6 |
- generated_from_trainer
|
|
|
7 |
- trl
|
8 |
+
- sft
|
9 |
licence: license
|
10 |
---
|
11 |
|
|
|
38 |
- Transformers: 4.53.0
|
39 |
- Pytorch: 2.6.0+cu124
|
40 |
- Datasets: 3.6.0
|
41 |
+
- Tokenizers: 0.21.2
|
42 |
|
43 |
## Citations
|
44 |
|
adapter_config.json
CHANGED
@@ -25,33 +25,33 @@
|
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
"linear_start",
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"post",
|
31 |
-
"v_proj",
|
32 |
-
"o_proj",
|
33 |
-
"modality_router",
|
34 |
"per_layer_projection",
|
35 |
-
"
|
36 |
"linear_left",
|
37 |
-
"q_proj",
|
38 |
-
"linear_end",
|
39 |
-
"down_proj",
|
40 |
-
"prediction_coefs",
|
41 |
-
"altup_unembed_projections.0",
|
42 |
-
"input_proj_linear",
|
43 |
"k_proj",
|
44 |
-
"linear_right",
|
45 |
-
"altup_projections.2",
|
46 |
-
"up_proj",
|
47 |
-
"gate_proj",
|
48 |
-
"embedding_projection",
|
49 |
"correction_coefs",
|
50 |
-
"
|
51 |
-
"pos_proj",
|
52 |
"per_layer_model_projection",
|
53 |
"ffw_layer_1",
|
|
|
|
|
|
|
|
|
|
|
54 |
"altup_unembed_projections.2",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
"altup_unembed_projections.1"
|
56 |
],
|
57 |
"task_type": "CAUSAL_LM",
|
|
|
25 |
"revision": null,
|
26 |
"target_modules": [
|
27 |
"linear_start",
|
28 |
+
"embedding_projection",
|
29 |
+
"down_proj",
|
|
|
|
|
|
|
|
|
30 |
"per_layer_projection",
|
31 |
+
"o_proj",
|
32 |
"linear_left",
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
"k_proj",
|
|
|
|
|
|
|
|
|
|
|
34 |
"correction_coefs",
|
35 |
+
"post",
|
|
|
36 |
"per_layer_model_projection",
|
37 |
"ffw_layer_1",
|
38 |
+
"altup_unembed_projections.0",
|
39 |
+
"altup_projections.2",
|
40 |
+
"prediction_coefs",
|
41 |
+
"ffw_layer_2",
|
42 |
+
"up_proj",
|
43 |
"altup_unembed_projections.2",
|
44 |
+
"altup_projections.0",
|
45 |
+
"gate_proj",
|
46 |
+
"input_proj_linear",
|
47 |
+
"pos_proj",
|
48 |
+
"modality_router",
|
49 |
+
"per_layer_input_gate",
|
50 |
+
"linear_end",
|
51 |
+
"linear_right",
|
52 |
+
"altup_projections.1",
|
53 |
+
"v_proj",
|
54 |
+
"q_proj",
|
55 |
"altup_unembed_projections.1"
|
56 |
],
|
57 |
"task_type": "CAUSAL_LM",
|
runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298118.66bfc5776606.1352.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad4acb284864b4e9b88980881d6beb3895a08e42bbe1ee7d7293d1bc79895ae4
|
3 |
+
size 9273
|
runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298272.66bfc5776606.1352.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87c1dcb8aa8da47e6e425a8aea93203307a31c9ac53be6a5b4295df1bc6e459a
|
3 |
+
size 10683
|
runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298547.66bfc5776606.1352.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5aac8a121067bede967a6cfabccee971d2f3837438284639289e73072ca30ead
|
3 |
+
size 9273
|
runs/Jun30_15-41-50_66bfc5776606/events.out.tfevents.1751298624.66bfc5776606.1352.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f511e5f0ef559d2cc1a0018d7406dadefd275e4412b52dd302970a6ed0f6fc9
|
3 |
+
size 9273
|
runs/Jun30_15-51-14_66bfc5776606/events.out.tfevents.1751298679.66bfc5776606.1352.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46aeba50ff979a56e5a7e62710414a371a9eb6f2bba21c508abfccff2dfd157c
|
3 |
+
size 9275
|
runs/Jun30_15-51-36_66bfc5776606/events.out.tfevents.1751298699.66bfc5776606.1352.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff62f7f6816e725b1a2d2016ced98f5fd21e68d5f6e69c663e529bedb72cb154
|
3 |
+
size 9599
|
runs/Jun30_16-20-23_66bfc5776606/events.out.tfevents.1751300433.66bfc5776606.20495.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a6d13fee2c776205c33e1cd8d20c02086f47d2279a8f4cdbd2fe7b99bf53e78
|
3 |
+
size 21702
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5520e030c7191bac4d3099929fe303545ea8469ce88179ab152aa2d7c7dede3
|
3 |
+
size 5816
|