ZiyuG commited on
Commit
20dd1d5
·
verified ·
1 Parent(s): 1976553

Upload 3 files

Browse files
Files changed (3) hide show
  1. dpo/config.json +12 -0
  2. dpo/config.yaml +80 -0
  3. dpo/pytorch_model.bin +3 -0
dpo/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "Showo",
3
+ "_diffusers_version": "0.30.1",
4
+ "codebook_size": 8192,
5
+ "llm_model_path": "microsoft/phi-1_5",
6
+ "llm_vocab_size": 50295,
7
+ "load_from_showo": false,
8
+ "mask_token_id": 58497,
9
+ "num_vq_tokens": 256,
10
+ "vocab_size": 58498,
11
+ "w_clip_vit": false
12
+ }
dpo/config.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb:
2
+ entity: null
3
+ resume: auto
4
+ run_id: vkmnwd3v
5
+ experiment:
6
+ project: training
7
+ name: show-o-dpo(only sft)
8
+ output_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5
9
+ save_every: 10000
10
+ eval_every: 2500
11
+ generate_every: 1000
12
+ log_every: 1
13
+ log_grad_norm_every: 20
14
+ logging_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5/logs
15
+ model:
16
+ vq_model:
17
+ type: magvitv2
18
+ vq_model_name: showlab/magvitv2
19
+ pretrained_model_path: /ssd7.7tb2/zrr/tcz/Show-o/ckts/magvitv2/pytorch_model.safetensors
20
+ showo:
21
+ load_from_showo: false
22
+ pretrained_model_path: /ssd7.7tb2/zrr/tcz/Show-o/ckts/pytorch_model.safetensors
23
+ w_clip_vit: false
24
+ vocab_size: 58498
25
+ llm_vocab_size: 50295
26
+ llm_model_path: microsoft/phi-1_5
27
+ codebook_size: 8192
28
+ num_vq_tokens: 256
29
+ num_new_special_tokens: 10
30
+ gradient_checkpointing: true
31
+ dataset:
32
+ gen_type: t2i
33
+ params:
34
+ data_path: /ssd7.7tb2/zrr/tcz/Show-o/ov/t2i_dpo_draft.yaml
35
+ validation_prompts_file: validation_prompts/showoprompts.txt
36
+ shuffle_buffer_size: 1000
37
+ num_workers: 32
38
+ resolution: 256
39
+ pin_memory: true
40
+ persistent_workers: true
41
+ preprocessing:
42
+ max_seq_length: 128
43
+ resolution: 256
44
+ center_crop: false
45
+ random_flip: false
46
+ optimizer:
47
+ name: adamw
48
+ params:
49
+ learning_rate: 1.0e-05
50
+ scale_lr: false
51
+ beta1: 0.9
52
+ beta2: 0.999
53
+ weight_decay: 0.01
54
+ epsilon: 1.0e-08
55
+ lr_scheduler:
56
+ scheduler: cosine
57
+ params:
58
+ learning_rate: ${optimizer.params.learning_rate}
59
+ warmup_ratio: 0.1
60
+ training:
61
+ gradient_accumulation_steps: 1
62
+ batch_size_t2i: 10
63
+ mixed_precision: bf16
64
+ enable_tf32: true
65
+ seed: 10086
66
+ overfit_one_batch: false
67
+ cond_dropout_prob: 0.1
68
+ min_masking_rate: 0.0
69
+ label_smoothing: 0.0
70
+ max_grad_norm: null
71
+ guidance_scale: 0.0
72
+ generation_timesteps: 12
73
+ beta: 0.1
74
+ reward_coef: 0
75
+ dpo_coef: 1
76
+ sft_coef: 0
77
+ num_epoch: 1
78
+ inference:
79
+ generation_timesteps: 18
80
+ guidance_scale: 1.75
dpo/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11b6ca792b5e4d0569b45a6f9511f422c3654d4276f5c1cb8ca2464eab4851cf
3
+ size 2896413564