codelion commited on
Commit
fff148b
·
verified ·
1 Parent(s): 5c66f35

Upload ICM-DPO enhanced Gemma PEFT adapter with comprehensive LoRA and model card

Browse files
README.md CHANGED
@@ -41,7 +41,7 @@ This model demonstrates comprehensive capability enhancement using ICM-generated
41
  - **Target Modules**: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
42
  - **Modules to Save**: embed_tokens, lm_head (full layers)
43
  - **Training Method**: Direct Preference Optimization (DPO)
44
- - **Beta (KL Penalty)**: 0.02
45
  - **Adapter Size**: ~669MB (includes full embedding/head layers)
46
  - **Trainable Parameters**: ~56.13838755173775% of base model
47
 
@@ -50,15 +50,15 @@ This model demonstrates comprehensive capability enhancement using ICM-generated
50
  ### Dataset
51
  - **Source**: [codelion/gemma-3-270m-icm-dpo](https://huggingface.co/datasets/codelion/gemma-3-270m-icm-dpo)
52
  - **Method**: ICM (Internal Coherence Maximization) for label-free preference generation
53
- - **Training Samples**: 1812
54
  - **Evaluation Samples**: 50
55
 
56
  ### Training Configuration
57
- - **Epochs**: 1
58
- - **Batch Size**: 1 (per device)
59
  - **Gradient Accumulation**: 8 steps
60
- - **Effective Batch Size**: 8
61
- - **Learning Rate**: 2e-07
62
  - **Optimizer**: paged_adamw_8bit
63
  - **Memory Optimization**: BF16, Gradient Checkpointing
64
 
 
41
  - **Target Modules**: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
42
  - **Modules to Save**: embed_tokens, lm_head (full layers)
43
  - **Training Method**: Direct Preference Optimization (DPO)
44
+ - **Beta (KL Penalty)**: 0.1
45
  - **Adapter Size**: ~669MB (includes full embedding/head layers)
46
  - **Trainable Parameters**: ~56.13838755173775% of base model
47
 
 
50
  ### Dataset
51
  - **Source**: [codelion/gemma-3-270m-icm-dpo](https://huggingface.co/datasets/codelion/gemma-3-270m-icm-dpo)
52
  - **Method**: ICM (Internal Coherence Maximization) for label-free preference generation
53
+ - **Training Samples**: 1060
54
  - **Evaluation Samples**: 50
55
 
56
  ### Training Configuration
57
+ - **Epochs**: 3
58
+ - **Batch Size**: 4 (per device)
59
  - **Gradient Accumulation**: 8 steps
60
+ - **Effective Batch Size**: 32
61
+ - **Learning Rate**: 5e-06
62
  - **Optimizer**: paged_adamw_8bit
63
  - **Memory Optimization**: BF16, Gradient Checkpointing
64
 
adapter_config.json CHANGED
@@ -28,13 +28,13 @@
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
- "gate_proj",
32
- "down_proj",
33
- "v_proj",
34
  "q_proj",
 
 
35
  "k_proj",
36
- "o_proj",
37
- "up_proj"
38
  ],
39
  "target_parameters": null,
40
  "task_type": "CAUSAL_LM",
 
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
+ "o_proj",
 
 
32
  "q_proj",
33
+ "v_proj",
34
+ "up_proj",
35
  "k_proj",
36
+ "down_proj",
37
+ "gate_proj"
38
  ],
39
  "target_parameters": null,
40
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67361f39938126afc8829ec3a771adc6af04dc92100c369d4bbf63931548b824
3
  size 701497992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ea8b9b9fe8720ed96c0cc938abb9da0ec35f9fbf0f18d92681aee01e2ec47e1
3
  size 701497992
results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "training_metrics": {
3
- "train_runtime": 567.3455,
4
- "train_samples_per_second": 3.194,
5
- "train_steps_per_second": 0.4,
6
  "total_flos": 0.0,
7
- "train_loss": 0.6932530382131165,
8
- "epoch": 1.0
9
  },
10
  "config": {
11
  "model_name": "google/gemma-3-270m-it",
@@ -28,23 +28,23 @@
28
  "lm_head"
29
  ],
30
  "max_train_samples": null,
31
- "beta": 0.02,
32
  "max_length": 1024,
33
  "max_prompt_length": 512,
34
- "batch_size": 1,
35
  "gradient_accumulation_steps": 8,
36
- "learning_rate": 2e-07,
37
- "num_train_epochs": 1,
38
  "warmup_ratio": 0.1,
39
- "weight_decay": 0.05,
40
- "max_grad_norm": 0.5,
41
  "gradient_checkpointing": true,
42
  "fp16": false,
43
  "bf16": true,
44
  "optim": "paged_adamw_8bit",
45
  "remove_unused_columns": false,
46
- "eval_steps": 250,
47
- "save_steps": 500,
48
  "logging_steps": 10,
49
  "eval_strategy": "steps",
50
  "save_strategy": "steps",
 
1
  {
2
  "training_metrics": {
3
+ "train_runtime": 230.3572,
4
+ "train_samples_per_second": 13.805,
5
+ "train_steps_per_second": 0.443,
6
  "total_flos": 0.0,
7
+ "train_loss": 0.6297222118751675,
8
+ "epoch": 3.0
9
  },
10
  "config": {
11
  "model_name": "google/gemma-3-270m-it",
 
28
  "lm_head"
29
  ],
30
  "max_train_samples": null,
31
+ "beta": 0.1,
32
  "max_length": 1024,
33
  "max_prompt_length": 512,
34
+ "batch_size": 4,
35
  "gradient_accumulation_steps": 8,
36
+ "learning_rate": 5e-06,
37
+ "num_train_epochs": 3,
38
  "warmup_ratio": 0.1,
39
+ "weight_decay": 0.01,
40
+ "max_grad_norm": 1.0,
41
  "gradient_checkpointing": true,
42
  "fp16": false,
43
  "bf16": true,
44
  "optim": "paged_adamw_8bit",
45
  "remove_unused_columns": false,
46
+ "eval_steps": 50,
47
+ "save_steps": 100,
48
  "logging_steps": 10,
49
  "eval_strategy": "steps",
50
  "save_strategy": "steps",
training_config.json CHANGED
@@ -19,23 +19,23 @@
19
  "lm_head"
20
  ],
21
  "max_train_samples": null,
22
- "beta": 0.02,
23
  "max_length": 1024,
24
  "max_prompt_length": 512,
25
- "batch_size": 1,
26
  "gradient_accumulation_steps": 8,
27
- "learning_rate": 2e-07,
28
- "num_train_epochs": 1,
29
  "warmup_ratio": 0.1,
30
- "weight_decay": 0.05,
31
- "max_grad_norm": 0.5,
32
  "gradient_checkpointing": true,
33
  "fp16": false,
34
  "bf16": true,
35
  "optim": "paged_adamw_8bit",
36
  "remove_unused_columns": false,
37
- "eval_steps": 250,
38
- "save_steps": 500,
39
  "logging_steps": 10,
40
  "eval_strategy": "steps",
41
  "save_strategy": "steps",
 
19
  "lm_head"
20
  ],
21
  "max_train_samples": null,
22
+ "beta": 0.1,
23
  "max_length": 1024,
24
  "max_prompt_length": 512,
25
+ "batch_size": 4,
26
  "gradient_accumulation_steps": 8,
27
+ "learning_rate": 5e-06,
28
+ "num_train_epochs": 3,
29
  "warmup_ratio": 0.1,
30
+ "weight_decay": 0.01,
31
+ "max_grad_norm": 1.0,
32
  "gradient_checkpointing": true,
33
  "fp16": false,
34
  "bf16": true,
35
  "optim": "paged_adamw_8bit",
36
  "remove_unused_columns": false,
37
+ "eval_steps": 50,
38
+ "save_steps": 100,
39
  "logging_steps": 10,
40
  "eval_strategy": "steps",
41
  "save_strategy": "steps",