|
data_cfgs: |
|
eval_data_files: null |
|
eval_datasets: null |
|
eval_optional_args: [] |
|
eval_size: null |
|
eval_split: null |
|
eval_subset: null |
|
eval_template: null |
|
train_data_files: 8713_tokenized_v2.pt |
|
train_datasets: /aifs4su/yaodong/datasets/llava_annotated |
|
train_optional_args: [] |
|
train_size: null |
|
train_split: train |
|
train_subset: null |
|
train_template: ANYTHING_TI2TI |
|
logger_cfgs: |
|
cache_dir: null |
|
log_project: align-anything |
|
log_run_name: sft |
|
log_type: wandb |
|
output_dir: ../outputs/sft_chameleon_0727_0802_v2.1_1e-4 |
|
save_interval: 500.0 |
|
model_cfgs: |
|
model_max_length: 4096 |
|
model_name_or_path: /aifs4su/yaodong/projects/hantao/anole/facilitating_image_generation/model/chameleon_hf_0727 |
|
trust_remote_code: true |
|
special_tokens: null |
|
train_cfgs: |
|
adam_betas: |
|
- 0.9 |
|
- 0.95 |
|
adam_epsilon: 1.0e-08 |
|
bf16: true |
|
ds_cfgs: ds_z3_config.json |
|
epochs: 3 |
|
eval_interval: 1000 |
|
eval_strategy: steps |
|
fp16: false |
|
freeze_language_model: false |
|
gradient_accumulation_steps: 2.0 |
|
gradient_checkpointing: true |
|
learning_rate: 0.0001 |
|
lr_scheduler_type: cosine |
|
lr_warmup_ratio: 0.03 |
|
max_grad_norm: 1.0 |
|
per_device_eval_batch_size: 2.0 |
|
per_device_train_batch_size: 2.0 |
|
seed: 42 |
|
weight_decay: 0.0 |
|
|