dev_align_anything_0 / arguments.yaml
htlou
init
6e45475
data_cfgs:
eval_data_files: null
eval_datasets: null
eval_optional_args: []
eval_size: null
eval_split: null
eval_subset: null
eval_template: null
train_data_files: 8713_tokenized_v2.pt
train_datasets: /aifs4su/yaodong/datasets/llava_annotated
train_optional_args: []
train_size: null
train_split: train
train_subset: null
train_template: ANYTHING_TI2TI
logger_cfgs:
cache_dir: null
log_project: align-anything
log_run_name: sft
log_type: wandb
output_dir: ../outputs/sft_chameleon_0727_0802_v2.1_1e-4
save_interval: 500.0
model_cfgs:
model_max_length: 4096
model_name_or_path: /aifs4su/yaodong/projects/hantao/anole/facilitating_image_generation/model/chameleon_hf_0727
trust_remote_code: true
special_tokens: null
train_cfgs:
adam_betas:
- 0.9
- 0.95
adam_epsilon: 1.0e-08
bf16: true
ds_cfgs: ds_z3_config.json
epochs: 3
eval_interval: 1000
eval_strategy: steps
fp16: false
freeze_language_model: false
gradient_accumulation_steps: 2.0
gradient_checkpointing: true
learning_rate: 0.0001
lr_scheduler_type: cosine
lr_warmup_ratio: 0.03
max_grad_norm: 1.0
per_device_eval_batch_size: 2.0
per_device_train_batch_size: 2.0
seed: 42
weight_decay: 0.0