uiuc-convai
/

CoALM-8B

Model card Files Files and versions Community

Nessii013 commited on Feb 10

Commit

494f1e5

·

verified ·

1 Parent(s): f3d9037

Create oumi_train.yaml

Files changed (1) hide show

oumi/oumi_train.yaml +71 -0

oumi/oumi_train.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+# Lora config for CALM 405B.
+model:
+  model_name: "meta-llama/Llama-3.1-8B-Instruct"
+  model_max_length: 4096
+  torch_dtype_str: "bfloat16"
+  attn_implementation: "sdpa"
+  load_pretrained_weights: True
+  trust_remote_code: True
+  tokenizer_pad_token: "<|finetune_right_pad_id|>"
+  enable_liger_kernel: True
+data:
+  train:
+    datasets:
+      - dataset_name: "text_sft_jsonl"
+        dataset_path: "/path/to/training/dataset.jsonl"
+        shuffle: True
+        seed: 42
+    collator_name: "text_completions_only_with_padding"
+    target_col: "messages"
+    use_async_dataset: True
+    seed: 42
+  validation:
+    datasets:
+      - dataset_name: "text_sft_jsonl"
+        dataset_path: "/path/to/validation/dataset.jsonl"
+    collator_name: "text_completions_only_with_padding"
+    target_col: "messages"
+    use_async_dataset: True
+    seed: 42
+training:
+  trainer_type: "TRL_SFT"
+  use_peft: True
+  save_steps: 300
+  num_train_epochs: 1
+  per_device_train_batch_size: 16
+  gradient_accumulation_steps: 1
+  eval_strategy: "steps"
+  eval_steps: 300
+  per_device_eval_batch_size: 4
+  enable_gradient_checkpointing: True
+  gradient_checkpointing_kwargs:
+    use_reentrant: False
+  ddp_find_unused_parameters: False
+  optimizer: "adamw_torch_fused"
+  learning_rate: 4.0e-05
+  warmup_steps: 24
+  weight_decay: 0.01
+  max_grad_norm: 10
+  compile: False
+  dataloader_num_workers: "auto"
+  dataloader_prefetch_factor: 32
+  logging_steps: 10
+  log_model_summary: False
+  empty_device_cache_steps: 1
+  include_performance_metrics: True
+  output_dir: "output/llama405b.qlora"
+  enable_wandb: True
+fsdp:
+  enable_fsdp: True
+  forward_prefetch: True
+  use_orig_params: True
+  cpu_offload: True
+  auto_wrap_policy: "TRANSFORMER_BASED_WRAP"
+  transformer_layer_cls: "LlamaDecoderLayer"