Delta-Vector commited on
Commit
209b08d
·
verified ·
1 Parent(s): 5dece29

Create mag.yml

Browse files
Files changed (1) hide show
  1. mag.yml +83 -0
mag.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NewEden/32B-inst
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ hub_model_id: NewEden/32b-mag
6
+ hub_strategy: "all_checkpoints"
7
+ push_dataset_to_hub:
8
+ hf_use_auth_token: true
9
+
10
+ plugins:
11
+ - axolotl.integrations.liger.LigerPlugin
12
+ - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
13
+ liger_rope: true
14
+ liger_rms_norm: true
15
+ liger_layer_norm: true
16
+ liger_glu_activation: true
17
+ liger_fused_linear_cross_entropy: false
18
+ cut_cross_entropy: true
19
+
20
+ load_in_8bit: false
21
+ load_in_4bit: false
22
+ strict: false
23
+
24
+ datasets:
25
+ - path: PocketDoc/Dans-Personamaxx-Logs
26
+ type: dan-chat-advanced
27
+ - path: anthracite-org/kalo-opus-instruct-22k-no-refusal
28
+ type: dan-chat-advanced
29
+ - path: lodrick-the-lafted/kalo-opus-instruct-3k-filtered
30
+ type: dan-chat-advanced
31
+ - path: anthracite-org/nopm_claude_writing_fixed
32
+ type: dan-chat-advanced
33
+ - path: anthracite-org/kalo_opus_misc_240827
34
+ type: dan-chat-advanced
35
+ - path: anthracite-org/kalo_misc_part2
36
+ type: dan-chat-advanced
37
+ - path: NewEden/Claude-Instruct-5K
38
+ type: dan-chat-advanced-
39
+ - path: NewEden/Claude-Instruct-2.7K
40
+ type: dan-chat-advanced
41
+ dataset_prepared_path: prepared_data
42
+ val_set_size: 0.0
43
+ output_dir: ./qwq-mag
44
+ sequence_len: 32768
45
+ sample_packing: true
46
+ pad_to_sequence_len: true
47
+
48
+ wandb_project: qwq
49
+ wandb_entity:
50
+ wandb_watch:
51
+ wandb_name: mag-attempt-01
52
+ wandb_log_model:
53
+
54
+ gradient_accumulation_steps: 2
55
+ micro_batch_size: 2
56
+ num_epochs: 2
57
+ optimizer: adamw_bnb_8bit
58
+ lr_scheduler: cosine
59
+ learning_rate: 3e-5
60
+ max_grad_norm: 1.0
61
+
62
+ train_on_inputs: false
63
+ group_by_length: false
64
+ bf16: auto
65
+ fp16:
66
+ tf32: false
67
+
68
+ gradient_checkpointing: unsloth
69
+ early_stopping_patience:
70
+ resume_from_checkpoint:
71
+ local_rank:
72
+ logging_steps: 1
73
+ xformers_attention:
74
+ flash_attention: true
75
+
76
+ warmup_steps: 40
77
+ saves_per_epoch: 2
78
+ debug:
79
+ deepspeed: deepspeed_configs/zero3_bf16.json
80
+ weight_decay: 0.02
81
+ fsdp:
82
+ fsdp_config:
83
+ special_tokens: