Wendy-Fly commited on
Commit
d1a5569
·
verified ·
1 Parent(s): 5f4033b

Upload qwen2vl_lora_sft.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. qwen2vl_lora_sft.yaml +9 -39
qwen2vl_lora_sft.yaml CHANGED
@@ -1,43 +1,13 @@
 
 
1
  ### model
2
  model_name_or_path: /home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/ckpt
3
- image_max_pixels: 262144
4
- video_max_pixels: 16384
5
- trust_remote_code: true
6
-
7
- ### method
8
- stage: sft
9
- do_train: true
10
- finetuning_type: lora
11
- lora_rank: 8
12
- lora_target: all
13
-
14
- ### dataset
15
- dataset: Percption
16
  template: qwen2_vl
17
- cutoff_len: 4096
18
- max_samples: 1000
19
- overwrite_cache: true
20
- preprocessing_num_workers: 16
21
-
22
- ### output
23
- output_dir: results/Qwen2.5-VL-3B_all
24
- logging_steps: 10
25
- save_steps: 500
26
- plot_loss: true
27
- overwrite_output_dir: true
28
-
29
- ### train
30
- per_device_train_batch_size: 1
31
- gradient_accumulation_steps: 8
32
- learning_rate: 1.0e-4
33
- num_train_epochs: 3.0
34
- lr_scheduler_type: cosine
35
- warmup_ratio: 0.1
36
- bf16: true
37
- ddp_timeout: 180000000
38
 
39
- ### eval
40
- # val_size: 0.1
41
- # per_device_eval_batch_size: 1
42
- # eval_strategy: steps
43
- # eval_steps: 500
 
1
+ ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
2
+
3
  ### model
4
  model_name_or_path: /home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/ckpt
5
+ adapter_name_or_path: /home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/LLaMA-Factory/results/Qwen2.5-VL-3B_all
 
 
 
 
 
 
 
 
 
 
 
 
6
  template: qwen2_vl
7
+ trust_remote_code: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ ### export
10
+ export_dir: output/Qwen2.5-VL-3B_all
11
+ export_size: 5
12
+ export_device: cpu
13
+ export_legacy_format: false