bnb_cfgs: bnb_4bit_compute_dtype: float16 bnb_4bit_quant_type: nf4 bnb_4bit_use_double_quant: true load_in_4bit: true load_in_8bit: false use_bnb: false data_cfgs: eval_data_files: null eval_datasets: null eval_optional_args: [] eval_size: null eval_split: null eval_subset: null eval_template: null ptx_data_files: flux_ptx_8k_t2i.pt ptx_datasets: /data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs ptx_optional_args: [] ptx_size: null ptx_split: null ptx_subset: null ptx_template: Chameleon_preference train_data_files: t2i_llf_prompt_only_tokenize.pt train_datasets: /data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs train_optional_args: [] train_size: 5000 train_split: null train_subset: null train_template: spavl_ti2ti logger_cfgs: cache_dir: null log_project: align-anything log_run_name: ppo log_type: wandb output_dir: ../outputs/ppo_t2i_llf_1018 save_interval: 30.0 lora_cfgs: inference_mode: false lora_alpha: 16 lora_dropout: 0.1 r: 16 save_full_model: true target_modules: - q_proj - v_proj task_type: TaskType.CAUSAL_LM use_lora: false model_cfgs: actor_model_name_or_path: /data/align-anything/hantao/models/0830_4k_sft_flux model_max_length: 2048 repetition_penalty: 1.0 reward_critic_model_name_or_path: /data/align-anything/hantao/align-anything/outputs/rm_t2i_llf_1017 reward_model_name_or_path: /data/align-anything/hantao/align-anything/outputs/rm_t2i_llf_1017 temperature: 1.0 top_p: 1.0 trust_remote_code: true special_tokens: null train_cfgs: actor_gradient_checkpointing: true actor_lr: 1.0e-05 actor_lr_scheduler_type: cosine actor_lr_warmup_ratio: 0.03 actor_weight_decay: 0.01 adam_betas: - 0.9 - 0.95 bf16: true clip_range_ratio: 0.2 clip_range_score: 50.0 clip_range_value: 5.0 critic_gradient_checkpointing: true critic_lr: 5.0e-06 critic_lr_scheduler_type: constant critic_lr_warmup_ratio: 0.03 critic_weight_decay: 0.0 ds_cfgs: ds_z3_config.json epochs: 3 eval_interval: 10 eval_strategy: epoch fp16: false freeze_language_model: true freeze_mm_proj: true freeze_vision_tower: false gae_lambda: 0.95 gamma: 1.0 gradient_accumulation_steps: 2 kl_coeff: 0.02 normalize_reward: false per_device_eval_batch_size: 8 per_device_prompt_batch_size: 8 per_device_train_batch_size: 8 ptx_coeff: 16.0 seed: 42 update_iters: 1