Tippawan commited on
Commit
ec77ab7
·
verified ·
1 Parent(s): b07ae82

End of training

Browse files
Files changed (2) hide show
  1. README.md +13 -12
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
- - name: proof-reading-SeaLLM3-7B-Chat-3090-v9
10
  results: []
11
  ---
12
 
@@ -26,8 +26,9 @@ load_in_4bit: true
26
  strict: false
27
 
28
  datasets:
29
- - path: Tippawan/p9-seallm
30
  type: sharegpt
 
31
  conversation: chatml
32
  field_messages: messages
33
  chat_template: chatml
@@ -41,7 +42,7 @@ eval_sample_packing: false
41
  pad_to_sequence_len: false
42
 
43
  push_to_hub: true
44
- hub_model_id: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v9 # Replace with your Hugging Face repo ID
45
  use_auth_token: true # Ensure you have set your Hugging Face API token in the environment
46
  hub_private_repo: true # Set to true if you want the repository to be private
47
  hub_strategy: all_checkpoints
@@ -49,22 +50,22 @@ save_total_limit: 3
49
  load_best_model_at_end: true
50
 
51
  adapter: lora
52
- lora_model_dir: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v8
53
  lora_r: 16
54
  lora_alpha: 32
55
  lora_dropout: 0.05
56
  lora_target_linear: true
57
  lora_fan_in_fan_out:
58
 
59
- wandb_project: proof-reading-SeaLLM3-7B-Chat-3090-v9
60
  wandb_entity:
61
  wandb_watch:
62
  wandb_name:
63
  wandb_log_model:
64
 
65
  gradient_accumulation_steps: 4
66
- micro_batch_size: 8
67
- num_epochs: 3 #editted 3
68
  optimizer: adamw_torch
69
  lr_scheduler: cosine
70
  learning_rate: 0.0002
@@ -96,7 +97,7 @@ special_tokens:
96
 
97
  </details><br>
98
 
99
- # proof-reading-SeaLLM3-7B-Chat-3090-v9
100
 
101
  This model is a fine-tuned version of [SeaLLMs/SeaLLM3-7B-Chat](https://huggingface.co/SeaLLMs/SeaLLM3-7B-Chat) on the None dataset.
102
 
@@ -118,15 +119,15 @@ More information needed
118
 
119
  The following hyperparameters were used during training:
120
  - learning_rate: 0.0002
121
- - train_batch_size: 8
122
- - eval_batch_size: 8
123
  - seed: 42
124
  - gradient_accumulation_steps: 4
125
- - total_train_batch_size: 32
126
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
127
  - lr_scheduler_type: cosine
128
  - lr_scheduler_warmup_steps: 10
129
- - num_epochs: 3
130
 
131
  ### Training results
132
 
 
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: proof-reading-SeaLLM3-7B-Chat-3090-v10
10
  results: []
11
  ---
12
 
 
26
  strict: false
27
 
28
  datasets:
29
+ - path: Tippawan/pr-10-wiki-seallm
30
  type: sharegpt
31
+ split: 'train[:100000]'
32
  conversation: chatml
33
  field_messages: messages
34
  chat_template: chatml
 
42
  pad_to_sequence_len: false
43
 
44
  push_to_hub: true
45
+ hub_model_id: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v10 # Replace with your Hugging Face repo ID
46
  use_auth_token: true # Ensure you have set your Hugging Face API token in the environment
47
  hub_private_repo: true # Set to true if you want the repository to be private
48
  hub_strategy: all_checkpoints
 
50
  load_best_model_at_end: true
51
 
52
  adapter: lora
53
+ lora_model_dir: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v9
54
  lora_r: 16
55
  lora_alpha: 32
56
  lora_dropout: 0.05
57
  lora_target_linear: true
58
  lora_fan_in_fan_out:
59
 
60
+ wandb_project: proof-reading-SeaLLM3-7B-Chat-3090-v10
61
  wandb_entity:
62
  wandb_watch:
63
  wandb_name:
64
  wandb_log_model:
65
 
66
  gradient_accumulation_steps: 4
67
+ micro_batch_size: 2
68
+ num_epochs: 1 #editted 3
69
  optimizer: adamw_torch
70
  lr_scheduler: cosine
71
  learning_rate: 0.0002
 
97
 
98
  </details><br>
99
 
100
+ # proof-reading-SeaLLM3-7B-Chat-3090-v10
101
 
102
  This model is a fine-tuned version of [SeaLLMs/SeaLLM3-7B-Chat](https://huggingface.co/SeaLLMs/SeaLLM3-7B-Chat) on the None dataset.
103
 
 
119
 
120
  The following hyperparameters were used during training:
121
  - learning_rate: 0.0002
122
+ - train_batch_size: 2
123
+ - eval_batch_size: 2
124
  - seed: 42
125
  - gradient_accumulation_steps: 4
126
+ - total_train_batch_size: 8
127
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
128
  - lr_scheduler_type: cosine
129
  - lr_scheduler_warmup_steps: 10
130
+ - num_epochs: 1
131
 
132
  ### Training results
133
 
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e8cba0fdd3c3de5054de7a5ea485dc2cb50911a2fef6c7be0f2d46d95e025c6
3
  size 161621802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510bb9fcb5e688917e13ab4eb4ad4b47014c4f16f157be766a5439ece5fe30b1
3
  size 161621802