BeaverAI
/

mistral-dory-12b

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Fizzarolli commited on Jul 21, 2024

Commit

3f5a614

·

verified ·

1 Parent(s): 05dd3a1

Create axolotl_config.yaml

Files changed (1) hide show

axolotl_config.yaml +128 -0

axolotl_config.yaml ADDED Viewed

	@@ -0,0 +1,128 @@

+# Weights and Biases logging config
+wandb_project: nemo-instruct-tune
+wandb_entity:
+wandb_watch:
+wandb_name: v1
+wandb_log_model:
+# Model architecture config
+base_model: mistralai/Mistral-Nemo-Base-2407
+model_type: AutoModelForCausalLM
+tokenizer_type: AutoTokenizer
+chat_template: alpaca
+# Hugging Face saving config
+hub_model_id: Fizzarolli/nemo-instruct-tune-v1
+hub_strategy: all_checkpoints
+push_dataset_to_hub:
+hf_use_auth_token:
+# Model checkpointing config
+output_dir: ./lora-out
+resume_from_checkpoint:
+save_steps:
+saves_per_epoch: 10
+save_safetensors: true
+save_total_limit: 2
+# Mixed precision training config
+bf16: true
+fp16: false
+tf32: false
+# Model loading config
+load_in_8bit: false
+load_in_4bit: true
+strict: false
+# Sequence config
+sequence_len: 16384
+s2_attention: false
+sample_packing: true
+eval_sample_packing: true
+pad_to_sequence_len: true
+train_on_inputs: false
+group_by_length: false
+# QLoRA adapter config
+adapter: qlora
+lora_model_dir:
+lora_r: 64
+lora_alpha: 64
+lora_dropout: 0.125
+lora_fan_in_fan_out:
+lora_target_linear:
+save_embedding_layers:
+peft_layers_to_transform:
+peft_use_dora: true
+peft_use_rslora:
+peft_layer_replication:
+lora_target_modules:
+    - gate_proj
+    - down_proj
+    - up_proj
+    - q_proj
+    - v_proj
+    - k_proj
+    - o_proj
+lora_modules_to_save:
+# Unfrozen parameters for FFT
+unfrozen_parameters:
+# Dataset config
+datasets:
+    - path: BeaverAI/Nemo-Inst-Tune-ds
+      type: chat_template
+val_set_size: 0.05
+evaluation_strategy:
+eval_steps:
+evals_per_epoch: 20
+test_datasets:
+dataset_prepared_path: ./prepared-datasets
+shuffle_merged_datasets: true
+# Training hyperparameters
+num_epochs: 1
+gradient_accumulation_steps: 2
+micro_batch_size: 1
+eval_batch_size: 1
+warmup_steps: 25
+optimizer: paged_adamw_8bit
+lr_scheduler: cosine
+learning_rate: 0.000007
+loraplus_lr_ratio: 8
+loraplus_lr_embedding:
+cosine_min_lr_ratio: 0.1
+weight_decay: 0.1
+max_grad_norm: 1
+logging_steps: 1
+# Model optimization
+gradient_checkpointing: unsloth
+xformers_attention: false
+flash_attention: true
+sdp_attention: false
+unsloth_cross_entropy_loss: false
+unsloth_lora_mlp: false
+unsloth_lora_qkv: false
+unsloth_lora_o: false
+# Loss monitoring config
+early_stopping_patience: false
+loss_watchdog_threshold: 100.0
+loss_watchdog_patience: 3
+# Debug config
+debug: true
+seed: 42
+# DeepSpeed and FSDP config
+deepspeed: deepspeed_configs/zero2.json
+fsdp:
+fsdp_config:
+# Token config
+special_tokens:
+    pad_token: "<pad>"
+tokens: