Upload 3 files
Browse files- dpo/config.json +12 -0
- dpo/config.yaml +80 -0
- dpo/pytorch_model.bin +3 -0
dpo/config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "Showo",
|
3 |
+
"_diffusers_version": "0.30.1",
|
4 |
+
"codebook_size": 8192,
|
5 |
+
"llm_model_path": "microsoft/phi-1_5",
|
6 |
+
"llm_vocab_size": 50295,
|
7 |
+
"load_from_showo": false,
|
8 |
+
"mask_token_id": 58497,
|
9 |
+
"num_vq_tokens": 256,
|
10 |
+
"vocab_size": 58498,
|
11 |
+
"w_clip_vit": false
|
12 |
+
}
|
dpo/config.yaml
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb:
|
2 |
+
entity: null
|
3 |
+
resume: auto
|
4 |
+
run_id: vkmnwd3v
|
5 |
+
experiment:
|
6 |
+
project: training
|
7 |
+
name: show-o-dpo(only sft)
|
8 |
+
output_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5
|
9 |
+
save_every: 10000
|
10 |
+
eval_every: 2500
|
11 |
+
generate_every: 1000
|
12 |
+
log_every: 1
|
13 |
+
log_grad_norm_every: 20
|
14 |
+
logging_dir: /ssd7.7tb2/zrr/tcz/Show-o/ckts/train_set_hq_prune_full_t2i_dpo_10k_1e-5/logs
|
15 |
+
model:
|
16 |
+
vq_model:
|
17 |
+
type: magvitv2
|
18 |
+
vq_model_name: showlab/magvitv2
|
19 |
+
pretrained_model_path: /ssd7.7tb2/zrr/tcz/Show-o/ckts/magvitv2/pytorch_model.safetensors
|
20 |
+
showo:
|
21 |
+
load_from_showo: false
|
22 |
+
pretrained_model_path: /ssd7.7tb2/zrr/tcz/Show-o/ckts/pytorch_model.safetensors
|
23 |
+
w_clip_vit: false
|
24 |
+
vocab_size: 58498
|
25 |
+
llm_vocab_size: 50295
|
26 |
+
llm_model_path: microsoft/phi-1_5
|
27 |
+
codebook_size: 8192
|
28 |
+
num_vq_tokens: 256
|
29 |
+
num_new_special_tokens: 10
|
30 |
+
gradient_checkpointing: true
|
31 |
+
dataset:
|
32 |
+
gen_type: t2i
|
33 |
+
params:
|
34 |
+
data_path: /ssd7.7tb2/zrr/tcz/Show-o/ov/t2i_dpo_draft.yaml
|
35 |
+
validation_prompts_file: validation_prompts/showoprompts.txt
|
36 |
+
shuffle_buffer_size: 1000
|
37 |
+
num_workers: 32
|
38 |
+
resolution: 256
|
39 |
+
pin_memory: true
|
40 |
+
persistent_workers: true
|
41 |
+
preprocessing:
|
42 |
+
max_seq_length: 128
|
43 |
+
resolution: 256
|
44 |
+
center_crop: false
|
45 |
+
random_flip: false
|
46 |
+
optimizer:
|
47 |
+
name: adamw
|
48 |
+
params:
|
49 |
+
learning_rate: 1.0e-05
|
50 |
+
scale_lr: false
|
51 |
+
beta1: 0.9
|
52 |
+
beta2: 0.999
|
53 |
+
weight_decay: 0.01
|
54 |
+
epsilon: 1.0e-08
|
55 |
+
lr_scheduler:
|
56 |
+
scheduler: cosine
|
57 |
+
params:
|
58 |
+
learning_rate: ${optimizer.params.learning_rate}
|
59 |
+
warmup_ratio: 0.1
|
60 |
+
training:
|
61 |
+
gradient_accumulation_steps: 1
|
62 |
+
batch_size_t2i: 10
|
63 |
+
mixed_precision: bf16
|
64 |
+
enable_tf32: true
|
65 |
+
seed: 10086
|
66 |
+
overfit_one_batch: false
|
67 |
+
cond_dropout_prob: 0.1
|
68 |
+
min_masking_rate: 0.0
|
69 |
+
label_smoothing: 0.0
|
70 |
+
max_grad_norm: null
|
71 |
+
guidance_scale: 0.0
|
72 |
+
generation_timesteps: 12
|
73 |
+
beta: 0.1
|
74 |
+
reward_coef: 0
|
75 |
+
dpo_coef: 1
|
76 |
+
sft_coef: 0
|
77 |
+
num_epoch: 1
|
78 |
+
inference:
|
79 |
+
generation_timesteps: 18
|
80 |
+
guidance_scale: 1.75
|
dpo/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11b6ca792b5e4d0569b45a6f9511f422c3654d4276f5c1cb8ca2464eab4851cf
|
3 |
+
size 2896413564
|