Hengly commited on
Commit
4fbd18f
·
verified ·
1 Parent(s): c7c8e4e

Upload 2 files

Browse files
Files changed (2) hide show
  1. added_tokens.json +205 -0
  2. config.json +173 -0
added_tokens.json ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<mask>": 256203,
3
+ "ace_Arab": 256001,
4
+ "ace_Latn": 256002,
5
+ "acm_Arab": 256003,
6
+ "acq_Arab": 256004,
7
+ "aeb_Arab": 256005,
8
+ "afr_Latn": 256006,
9
+ "ajp_Arab": 256007,
10
+ "aka_Latn": 256008,
11
+ "als_Latn": 256162,
12
+ "amh_Ethi": 256009,
13
+ "apc_Arab": 256010,
14
+ "arb_Arab": 256011,
15
+ "ars_Arab": 256012,
16
+ "ary_Arab": 256013,
17
+ "arz_Arab": 256014,
18
+ "asm_Beng": 256015,
19
+ "ast_Latn": 256016,
20
+ "awa_Deva": 256017,
21
+ "ayr_Latn": 256018,
22
+ "azb_Arab": 256019,
23
+ "azj_Latn": 256020,
24
+ "bak_Cyrl": 256021,
25
+ "bam_Latn": 256022,
26
+ "ban_Latn": 256023,
27
+ "bel_Cyrl": 256024,
28
+ "bem_Latn": 256025,
29
+ "ben_Beng": 256026,
30
+ "bho_Deva": 256027,
31
+ "bjn_Arab": 256028,
32
+ "bjn_Latn": 256029,
33
+ "bod_Tibt": 256030,
34
+ "bos_Latn": 256031,
35
+ "bug_Latn": 256032,
36
+ "bul_Cyrl": 256033,
37
+ "cat_Latn": 256034,
38
+ "ceb_Latn": 256035,
39
+ "ces_Latn": 256036,
40
+ "cjk_Latn": 256037,
41
+ "ckb_Arab": 256038,
42
+ "crh_Latn": 256039,
43
+ "cym_Latn": 256040,
44
+ "dan_Latn": 256041,
45
+ "deu_Latn": 256042,
46
+ "dik_Latn": 256043,
47
+ "dyu_Latn": 256044,
48
+ "dzo_Tibt": 256045,
49
+ "ell_Grek": 256046,
50
+ "eng_Latn": 256047,
51
+ "epo_Latn": 256048,
52
+ "est_Latn": 256049,
53
+ "eus_Latn": 256050,
54
+ "ewe_Latn": 256051,
55
+ "fao_Latn": 256052,
56
+ "fij_Latn": 256054,
57
+ "fin_Latn": 256055,
58
+ "fon_Latn": 256056,
59
+ "fra_Latn": 256057,
60
+ "fur_Latn": 256058,
61
+ "fuv_Latn": 256059,
62
+ "gaz_Latn": 256135,
63
+ "gla_Latn": 256060,
64
+ "gle_Latn": 256061,
65
+ "glg_Latn": 256062,
66
+ "grn_Latn": 256063,
67
+ "guj_Gujr": 256064,
68
+ "hat_Latn": 256065,
69
+ "hau_Latn": 256066,
70
+ "heb_Hebr": 256067,
71
+ "hin_Deva": 256068,
72
+ "hne_Deva": 256069,
73
+ "hrv_Latn": 256070,
74
+ "hun_Latn": 256071,
75
+ "hye_Armn": 256072,
76
+ "ibo_Latn": 256073,
77
+ "ilo_Latn": 256074,
78
+ "ind_Latn": 256075,
79
+ "isl_Latn": 256076,
80
+ "ita_Latn": 256077,
81
+ "jav_Latn": 256078,
82
+ "jpn_Jpan": 256079,
83
+ "kab_Latn": 256080,
84
+ "kac_Latn": 256081,
85
+ "kam_Latn": 256082,
86
+ "kan_Knda": 256083,
87
+ "kas_Arab": 256084,
88
+ "kas_Deva": 256085,
89
+ "kat_Geor": 256086,
90
+ "kaz_Cyrl": 256089,
91
+ "kbp_Latn": 256090,
92
+ "kea_Latn": 256091,
93
+ "khk_Cyrl": 256122,
94
+ "khm_Khmr": 256092,
95
+ "kik_Latn": 256093,
96
+ "kin_Latn": 256094,
97
+ "kir_Cyrl": 256095,
98
+ "kmb_Latn": 256096,
99
+ "kmr_Latn": 256099,
100
+ "knc_Arab": 256087,
101
+ "knc_Latn": 256088,
102
+ "kon_Latn": 256097,
103
+ "kor_Hang": 256098,
104
+ "lao_Laoo": 256100,
105
+ "lij_Latn": 256102,
106
+ "lim_Latn": 256103,
107
+ "lin_Latn": 256104,
108
+ "lit_Latn": 256105,
109
+ "lmo_Latn": 256106,
110
+ "ltg_Latn": 256107,
111
+ "ltz_Latn": 256108,
112
+ "lua_Latn": 256109,
113
+ "lug_Latn": 256110,
114
+ "luo_Latn": 256111,
115
+ "lus_Latn": 256112,
116
+ "lvs_Latn": 256101,
117
+ "mag_Deva": 256113,
118
+ "mai_Deva": 256114,
119
+ "mal_Mlym": 256115,
120
+ "mar_Deva": 256116,
121
+ "min_Latn": 256117,
122
+ "mkd_Cyrl": 256118,
123
+ "mlt_Latn": 256120,
124
+ "mni_Beng": 256121,
125
+ "mos_Latn": 256123,
126
+ "mri_Latn": 256124,
127
+ "mya_Mymr": 256126,
128
+ "nld_Latn": 256127,
129
+ "nno_Latn": 256128,
130
+ "nob_Latn": 256129,
131
+ "npi_Deva": 256130,
132
+ "nso_Latn": 256131,
133
+ "nus_Latn": 256132,
134
+ "nya_Latn": 256133,
135
+ "oci_Latn": 256134,
136
+ "ory_Orya": 256136,
137
+ "pag_Latn": 256137,
138
+ "pan_Guru": 256138,
139
+ "pap_Latn": 256139,
140
+ "pbt_Arab": 256143,
141
+ "pes_Arab": 256053,
142
+ "plt_Latn": 256119,
143
+ "pol_Latn": 256140,
144
+ "por_Latn": 256141,
145
+ "prs_Arab": 256142,
146
+ "quy_Latn": 256144,
147
+ "ron_Latn": 256145,
148
+ "run_Latn": 256146,
149
+ "rus_Cyrl": 256147,
150
+ "sag_Latn": 256148,
151
+ "san_Deva": 256149,
152
+ "sat_Beng": 256150,
153
+ "scn_Latn": 256151,
154
+ "shn_Mymr": 256152,
155
+ "sin_Sinh": 256153,
156
+ "slk_Latn": 256154,
157
+ "slv_Latn": 256155,
158
+ "smo_Latn": 256156,
159
+ "sna_Latn": 256157,
160
+ "snd_Arab": 256158,
161
+ "som_Latn": 256159,
162
+ "sot_Latn": 256160,
163
+ "spa_Latn": 256161,
164
+ "srd_Latn": 256163,
165
+ "srp_Cyrl": 256164,
166
+ "ssw_Latn": 256165,
167
+ "sun_Latn": 256166,
168
+ "swe_Latn": 256167,
169
+ "swh_Latn": 256168,
170
+ "szl_Latn": 256169,
171
+ "tam_Taml": 256170,
172
+ "taq_Latn": 256177,
173
+ "taq_Tfng": 256178,
174
+ "tat_Cyrl": 256171,
175
+ "tel_Telu": 256172,
176
+ "tgk_Cyrl": 256173,
177
+ "tgl_Latn": 256174,
178
+ "tha_Thai": 256175,
179
+ "tir_Ethi": 256176,
180
+ "tpi_Latn": 256179,
181
+ "tsn_Latn": 256180,
182
+ "tso_Latn": 256181,
183
+ "tuk_Latn": 256182,
184
+ "tum_Latn": 256183,
185
+ "tur_Latn": 256184,
186
+ "twi_Latn": 256185,
187
+ "tzm_Tfng": 256186,
188
+ "uig_Arab": 256187,
189
+ "ukr_Cyrl": 256188,
190
+ "umb_Latn": 256189,
191
+ "urd_Arab": 256190,
192
+ "uzn_Latn": 256191,
193
+ "vec_Latn": 256192,
194
+ "vie_Latn": 256193,
195
+ "war_Latn": 256194,
196
+ "wol_Latn": 256195,
197
+ "xho_Latn": 256196,
198
+ "ydd_Hebr": 256197,
199
+ "yor_Latn": 256198,
200
+ "yue_Hant": 256199,
201
+ "zho_Hans": 256200,
202
+ "zho_Hant": 256201,
203
+ "zsm_Latn": 256125,
204
+ "zul_Latn": 256202
205
+ }
config.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ocrmodel-small-printed-V2/checkpoint-37640",
3
+ "architectures": [
4
+ "VisionEncoderDecoderModel"
5
+ ],
6
+ "decoder": {
7
+ "_name_or_path": "",
8
+ "activation_dropout": 0.0,
9
+ "activation_function": "relu",
10
+ "add_cross_attention": true,
11
+ "architectures": null,
12
+ "attention_dropout": 0.0,
13
+ "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
+ "bos_token_id": 0,
16
+ "chunk_size_feed_forward": 0,
17
+ "classifier_dropout": 0.0,
18
+ "cross_attention_hidden_size": 384,
19
+ "d_model": 256,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 1024,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 2,
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "dropout": 0.1,
28
+ "early_stopping": false,
29
+ "encoder_no_repeat_ngram_size": 0,
30
+ "eos_token_id": 2,
31
+ "exponential_decay_length_penalty": null,
32
+ "finetuning_task": null,
33
+ "forced_bos_token_id": null,
34
+ "forced_eos_token_id": null,
35
+ "id2label": {
36
+ "0": "LABEL_0",
37
+ "1": "LABEL_1"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_decoder": true,
41
+ "is_encoder_decoder": false,
42
+ "label2id": {
43
+ "LABEL_0": 0,
44
+ "LABEL_1": 1
45
+ },
46
+ "layernorm_embedding": true,
47
+ "length_penalty": 1.0,
48
+ "max_length": 20,
49
+ "max_position_embeddings": 512,
50
+ "min_length": 0,
51
+ "model_type": "trocr",
52
+ "no_repeat_ngram_size": 0,
53
+ "num_beam_groups": 1,
54
+ "num_beams": 1,
55
+ "num_return_sequences": 1,
56
+ "output_attentions": false,
57
+ "output_hidden_states": false,
58
+ "output_scores": false,
59
+ "pad_token_id": 1,
60
+ "prefix": null,
61
+ "problem_type": null,
62
+ "pruned_heads": {},
63
+ "remove_invalid_values": false,
64
+ "repetition_penalty": 1.0,
65
+ "return_dict": true,
66
+ "return_dict_in_generate": false,
67
+ "scale_embedding": true,
68
+ "sep_token_id": null,
69
+ "suppress_tokens": null,
70
+ "task_specific_params": null,
71
+ "temperature": 1.0,
72
+ "tf_legacy_loss": false,
73
+ "tie_encoder_decoder": false,
74
+ "tie_word_embeddings": false,
75
+ "tokenizer_class": null,
76
+ "top_k": 50,
77
+ "top_p": 1.0,
78
+ "torch_dtype": null,
79
+ "torchscript": false,
80
+ "typical_p": 1.0,
81
+ "use_bfloat16": false,
82
+ "use_cache": false,
83
+ "use_learned_position_embeddings": true,
84
+ "vocab_size": 256204
85
+ },
86
+ "decoder_start_token_id": 0,
87
+ "encoder": {
88
+ "_name_or_path": "",
89
+ "add_cross_attention": false,
90
+ "architectures": null,
91
+ "attention_probs_dropout_prob": 0.0,
92
+ "bad_words_ids": null,
93
+ "begin_suppress_tokens": null,
94
+ "bos_token_id": null,
95
+ "chunk_size_feed_forward": 0,
96
+ "cross_attention_hidden_size": null,
97
+ "decoder_start_token_id": null,
98
+ "diversity_penalty": 0.0,
99
+ "do_sample": false,
100
+ "early_stopping": false,
101
+ "encoder_no_repeat_ngram_size": 0,
102
+ "encoder_stride": 16,
103
+ "eos_token_id": null,
104
+ "exponential_decay_length_penalty": null,
105
+ "finetuning_task": null,
106
+ "forced_bos_token_id": null,
107
+ "forced_eos_token_id": null,
108
+ "hidden_act": "gelu",
109
+ "hidden_dropout_prob": 0.0,
110
+ "hidden_size": 384,
111
+ "id2label": {
112
+ "0": "LABEL_0",
113
+ "1": "LABEL_1"
114
+ },
115
+ "image_size": 384,
116
+ "initializer_range": 0.02,
117
+ "intermediate_size": 1536,
118
+ "is_decoder": false,
119
+ "is_encoder_decoder": false,
120
+ "label2id": {
121
+ "LABEL_0": 0,
122
+ "LABEL_1": 1
123
+ },
124
+ "layer_norm_eps": 1e-12,
125
+ "length_penalty": 1.0,
126
+ "max_length": 20,
127
+ "min_length": 0,
128
+ "model_type": "deit",
129
+ "no_repeat_ngram_size": 0,
130
+ "num_attention_heads": 6,
131
+ "num_beam_groups": 1,
132
+ "num_beams": 1,
133
+ "num_channels": 3,
134
+ "num_hidden_layers": 12,
135
+ "num_return_sequences": 1,
136
+ "output_attentions": false,
137
+ "output_hidden_states": false,
138
+ "output_scores": false,
139
+ "pad_token_id": null,
140
+ "patch_size": 16,
141
+ "prefix": null,
142
+ "problem_type": null,
143
+ "pruned_heads": {},
144
+ "qkv_bias": true,
145
+ "remove_invalid_values": false,
146
+ "repetition_penalty": 1.0,
147
+ "return_dict": true,
148
+ "return_dict_in_generate": false,
149
+ "sep_token_id": null,
150
+ "suppress_tokens": null,
151
+ "task_specific_params": null,
152
+ "temperature": 1.0,
153
+ "tf_legacy_loss": false,
154
+ "tie_encoder_decoder": false,
155
+ "tie_word_embeddings": true,
156
+ "tokenizer_class": null,
157
+ "top_k": 50,
158
+ "top_p": 1.0,
159
+ "torch_dtype": null,
160
+ "torchscript": false,
161
+ "typical_p": 1.0,
162
+ "use_bfloat16": false
163
+ },
164
+ "eos_token_id": 2,
165
+ "is_encoder_decoder": true,
166
+ "max_length": 64,
167
+ "model_type": "vision-encoder-decoder",
168
+ "pad_token_id": 1,
169
+ "tie_word_embeddings": false,
170
+ "torch_dtype": "float32",
171
+ "transformers_version": "4.40.1",
172
+ "vocab_size": 256204
173
+ }