Image-to-Text
Chinese
English
File size: 436 Bytes
3d7aa36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
image_root: ''   # for image path
ann_root: ''     # for json path
dataset: 'facecaption'

config: './configs'


pretrained: ''
intermediate_hidden_state: False

# size of vit model; base or large
vit: 'base'
image_size: 224
batch_size_train: 80
batch_size_test: 80

queue_size: 61440
alpha: 0.4
k_test: 256

# optimizer
weight_decay: 0.05
init_lr: 3e-5
min_lr: 1e-6
warmup_lr: 1e-6
lr_decay_rate: 0.9
max_epoch: 15
warmup_steps: 20000