File size: 2,493 Bytes
c0eeaa0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
bnb_cfgs:
bnb_4bit_compute_dtype: float16
bnb_4bit_quant_type: nf4
bnb_4bit_use_double_quant: true
load_in_4bit: true
load_in_8bit: false
use_bnb: false
data_cfgs:
eval_data_files: null
eval_datasets: null
eval_optional_args: []
eval_size: null
eval_split: null
eval_subset: null
eval_template: null
ptx_data_files: flux_ptx_8k_t2i.pt
ptx_datasets: /data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs
ptx_optional_args: []
ptx_size: null
ptx_split: null
ptx_subset: null
ptx_template: Chameleon_preference
train_data_files: t2i_llf_prompt_only_tokenize.pt
train_datasets: /data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs
train_optional_args: []
train_size: 5000
train_split: null
train_subset: null
train_template: spavl_ti2ti
logger_cfgs:
cache_dir: null
log_project: align-anything
log_run_name: ppo
log_type: wandb
output_dir: ../outputs/ppo_t2i_llf_1018
save_interval: 30.0
lora_cfgs:
inference_mode: false
lora_alpha: 16
lora_dropout: 0.1
r: 16
save_full_model: true
target_modules:
- q_proj
- v_proj
task_type: TaskType.CAUSAL_LM
use_lora: false
model_cfgs:
actor_model_name_or_path: /data/align-anything/hantao/models/0830_4k_sft_flux
model_max_length: 2048
repetition_penalty: 1.0
reward_critic_model_name_or_path: /data/align-anything/hantao/align-anything/outputs/rm_t2i_llf_1017
reward_model_name_or_path: /data/align-anything/hantao/align-anything/outputs/rm_t2i_llf_1017
temperature: 1.0
top_p: 1.0
trust_remote_code: true
special_tokens: null
train_cfgs:
actor_gradient_checkpointing: true
actor_lr: 1.0e-05
actor_lr_scheduler_type: cosine
actor_lr_warmup_ratio: 0.03
actor_weight_decay: 0.01
adam_betas:
- 0.9
- 0.95
bf16: true
clip_range_ratio: 0.2
clip_range_score: 50.0
clip_range_value: 5.0
critic_gradient_checkpointing: true
critic_lr: 5.0e-06
critic_lr_scheduler_type: constant
critic_lr_warmup_ratio: 0.03
critic_weight_decay: 0.0
ds_cfgs: ds_z3_config.json
epochs: 3
eval_interval: 10
eval_strategy: epoch
fp16: false
freeze_language_model: true
freeze_mm_proj: true
freeze_vision_tower: false
gae_lambda: 0.95
gamma: 1.0
gradient_accumulation_steps: 2
kl_coeff: 0.02
normalize_reward: false
per_device_eval_batch_size: 8
per_device_prompt_batch_size: 8
per_device_train_batch_size: 8
ptx_coeff: 16.0
seed: 42
update_iters: 1
|