merve HF Staff commited on
Commit
20fbdae
·
verified ·
1 Parent(s): 36dbaa3

Training in progress, step 100

Browse files
README.md CHANGED
@@ -34,10 +34,10 @@ This model was trained with SFT.
34
 
35
  ### Framework versions
36
 
37
- - TRL: 0.19.0
38
- - Transformers: 4.53.0
39
  - Pytorch: 2.6.0+cu124
40
- - Datasets: 3.6.0
41
  - Tokenizers: 0.21.2
42
 
43
  ## Citations
 
34
 
35
  ### Framework versions
36
 
37
+ - TRL: 0.19.1
38
+ - Transformers: 4.53.2
39
  - Pytorch: 2.6.0+cu124
40
+ - Datasets: 4.0.0
41
  - Tokenizers: 0.21.2
42
 
43
  ## Citations
adapter_config.json CHANGED
@@ -20,42 +20,44 @@
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
 
23
  "r": 16,
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
- "linear_end",
28
- "linear_left",
29
- "post",
 
 
 
 
 
 
 
30
  "up_proj",
 
31
  "q_proj",
32
  "embedding_projection",
33
- "gate_proj",
34
- "ffw_layer_2",
35
- "o_proj",
36
- "v_proj",
37
- "altup_projections.1",
38
- "down_proj",
39
- "prediction_coefs",
40
- "altup_projections.0",
41
  "k_proj",
 
 
 
 
42
  "per_layer_input_gate",
43
- "altup_unembed_projections.1",
44
- "input_proj_linear",
45
- "per_layer_model_projection",
46
  "linear_start",
47
- "correction_coefs",
48
- "altup_unembed_projections.2",
49
- "linear_right",
50
- "altup_unembed_projections.0",
51
  "pos_proj",
52
- "ffw_layer_1",
53
- "per_layer_projection",
54
  "modality_router",
55
- "altup_projections.2"
 
56
  ],
57
  "task_type": "CAUSAL_LM",
58
  "trainable_token_indices": null,
59
  "use_dora": false,
 
60
  "use_rslora": false
61
  }
 
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
  "r": 16,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
+ "per_layer_model_projection",
29
+ "altup_projections.0",
30
+ "correction_coefs",
31
+ "altup_projections.2",
32
+ "down_proj",
33
+ "ffw_layer_2",
34
+ "per_layer_projection",
35
+ "input_proj_linear",
36
+ "v_proj",
37
+ "altup_unembed_projections.2",
38
  "up_proj",
39
+ "o_proj",
40
  "q_proj",
41
  "embedding_projection",
 
 
 
 
 
 
 
 
42
  "k_proj",
43
+ "post",
44
+ "ffw_layer_1",
45
+ "altup_projections.1",
46
+ "altup_unembed_projections.0",
47
  "per_layer_input_gate",
48
+ "gate_proj",
49
+ "linear_end",
50
+ "linear_left",
51
  "linear_start",
 
 
 
 
52
  "pos_proj",
53
+ "linear_right",
 
54
  "modality_router",
55
+ "altup_unembed_projections.1",
56
+ "prediction_coefs"
57
  ],
58
  "task_type": "CAUSAL_LM",
59
  "trainable_token_indices": null,
60
  "use_dora": false,
61
+ "use_qalora": false,
62
  "use_rslora": false
63
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7edd90144d3df68ff24938748ba2b0fd3c3e5bdcab789ffcfa95240c9c68c910
3
  size 149802432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00f42bc7081540955803b0dcbff13a15f9388b327f68f63c04449587d8b1c841
3
  size 149802432
runs/Jul16_10-48-07_2d2563e51c9f/events.out.tfevents.1752662905.2d2563e51c9f.677.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adc2a54c237d4d1bb685bee7adce21c2872b2cbe07d65f0ebcb62129ea623e32
3
+ size 9142
runs/Jul16_10-50-13_2d2563e51c9f/events.out.tfevents.1752663019.2d2563e51c9f.677.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7b1edcf100c01328aa8ef70ad6a81631be84d0bf61a89914752637f038d3cb
3
+ size 9142
runs/Jul16_10-51-20_2d2563e51c9f/events.out.tfevents.1752663086.2d2563e51c9f.677.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c46fa654487ef6b2f09c19a93b11c5bb5050d1e996a74b3a95cb0ec5fd6cd0
3
+ size 12382
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39417d8c7e1909db45b4937df5d36aaf5f0875b5d264733ff9bca417831a1923
3
- size 5816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66231d69cab4f7f2ddd62fbaa5f73322a540ab12266e1b62f63d0d76fe83326a
3
+ size 5752