krishnateja95 commited on
Commit
c337a43
·
verified ·
1 Parent(s): 16953d6

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepseek-ai/deepseek-vl2-tiny",
3
+ "architectures": [
4
+ "DeepseekVLV2ForCausalLM"
5
+ ],
6
+ "candidate_resolutions": [
7
+ [
8
+ 384,
9
+ 384
10
+ ],
11
+ [
12
+ 384,
13
+ 768
14
+ ],
15
+ [
16
+ 768,
17
+ 384
18
+ ],
19
+ [
20
+ 384,
21
+ 1152
22
+ ],
23
+ [
24
+ 1152,
25
+ 384
26
+ ],
27
+ [
28
+ 384,
29
+ 1536
30
+ ],
31
+ [
32
+ 1536,
33
+ 384
34
+ ],
35
+ [
36
+ 768,
37
+ 768
38
+ ],
39
+ [
40
+ 384,
41
+ 1920
42
+ ],
43
+ [
44
+ 1920,
45
+ 384
46
+ ],
47
+ [
48
+ 384,
49
+ 2304
50
+ ],
51
+ [
52
+ 2304,
53
+ 384
54
+ ],
55
+ [
56
+ 768,
57
+ 1152
58
+ ],
59
+ [
60
+ 1152,
61
+ 768
62
+ ],
63
+ [
64
+ 384,
65
+ 2688
66
+ ],
67
+ [
68
+ 2688,
69
+ 384
70
+ ],
71
+ [
72
+ 384,
73
+ 3072
74
+ ],
75
+ [
76
+ 3072,
77
+ 384
78
+ ],
79
+ [
80
+ 768,
81
+ 1536
82
+ ],
83
+ [
84
+ 1536,
85
+ 768
86
+ ],
87
+ [
88
+ 384,
89
+ 3456
90
+ ],
91
+ [
92
+ 3456,
93
+ 384
94
+ ],
95
+ [
96
+ 1152,
97
+ 1152
98
+ ]
99
+ ],
100
+ "global_view_pos": "head",
101
+ "language_config": {
102
+ "_attn_implementation_autoset": true,
103
+ "architectures": [
104
+ "DeepseekV2ForCausalLM"
105
+ ],
106
+ "auto_map": {
107
+ "AutoConfig": "configuration_deepseek.DeepseekV2Config",
108
+ "AutoModel": "modeling_deepseek.DeepseekV2Model",
109
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
110
+ },
111
+ "bos_token_id": 0,
112
+ "eos_token_id": 1,
113
+ "first_k_dense_replace": 1,
114
+ "hidden_size": 1280,
115
+ "intermediate_size": 6848,
116
+ "kv_lora_rank": null,
117
+ "lm_head": true,
118
+ "max_position_embeddings": 4096,
119
+ "model_type": "deepseek_v2",
120
+ "moe_intermediate_size": 896,
121
+ "n_group": 1,
122
+ "n_routed_experts": 64,
123
+ "n_shared_experts": 2,
124
+ "num_attention_heads": 10,
125
+ "num_experts_per_tok": 6,
126
+ "num_hidden_layers": 12,
127
+ "num_key_value_heads": 10,
128
+ "q_lora_rank": null,
129
+ "qk_nope_head_dim": 0,
130
+ "qk_rope_head_dim": 0,
131
+ "rm_head": false,
132
+ "topk_group": 1,
133
+ "topk_method": "greedy",
134
+ "torch_dtype": "bfloat16",
135
+ "use_mla": false,
136
+ "v_head_dim": 0,
137
+ "vocab_size": 129280
138
+ },
139
+ "model_type": "deepseek_vl_v2",
140
+ "projector_config": {
141
+ "model_type": "mlp_projector",
142
+ "n_embed": 1280
143
+ },
144
+ "quantization_config": {
145
+ "amp": true,
146
+ "autoround_version": "0.4.5",
147
+ "batch_size": 8,
148
+ "bits": 4,
149
+ "data_type": "int",
150
+ "dataset": "NeelNanda/pile-10k",
151
+ "enable_minmax_tuning": true,
152
+ "enable_norm_bias_tuning": false,
153
+ "enable_quanted_input": true,
154
+ "gradient_accumulate_steps": 1,
155
+ "group_size": 32,
156
+ "iters": 200,
157
+ "low_gpu_mem_usage": false,
158
+ "lr": 0.005,
159
+ "minmax_lr": 0.005,
160
+ "modules_to_not_convert": [
161
+ "vision.blocks",
162
+ "projector.layers",
163
+ "vision.attn_pool.q",
164
+ "vision.attn_pool.kv",
165
+ "vision.attn_pool.proj",
166
+ "vision.attn_pool.mlp.fc1",
167
+ "vision.attn_pool.mlp.fc2",
168
+ "language.lm_head"
169
+ ],
170
+ "nsamples": 128,
171
+ "quant_method": "awq",
172
+ "scale_dtype": "torch.float16",
173
+ "seqlen": 2048,
174
+ "sym": true,
175
+ "to_quant_block_names": "language.model.layers",
176
+ "version": "gemm",
177
+ "zero_point": false
178
+ },
179
+ "tile_tag": "2D",
180
+ "torch_dtype": "float16",
181
+ "transformers_version": "4.47.1",
182
+ "vision_config": {
183
+ "layers": 27,
184
+ "mlp_ratio": 3.7362,
185
+ "model_name": "siglip_so400m_patch14_384",
186
+ "model_type": "vision",
187
+ "patch_size": 14,
188
+ "width": 1152
189
+ }
190
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.47.1"
4
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8234004987a630b885965cb4572e106e76e3aed6d6d3ff40ccb2e443fc498e81
3
+ size 3040975592
processor_config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_special_token": false,
3
+ "candidate_resolutions": [
4
+ [
5
+ 384,
6
+ 384
7
+ ],
8
+ [
9
+ 384,
10
+ 768
11
+ ],
12
+ [
13
+ 768,
14
+ 384
15
+ ],
16
+ [
17
+ 384,
18
+ 1152
19
+ ],
20
+ [
21
+ 1152,
22
+ 384
23
+ ],
24
+ [
25
+ 384,
26
+ 1536
27
+ ],
28
+ [
29
+ 1536,
30
+ 384
31
+ ],
32
+ [
33
+ 768,
34
+ 768
35
+ ],
36
+ [
37
+ 384,
38
+ 1920
39
+ ],
40
+ [
41
+ 1920,
42
+ 384
43
+ ],
44
+ [
45
+ 384,
46
+ 2304
47
+ ],
48
+ [
49
+ 2304,
50
+ 384
51
+ ],
52
+ [
53
+ 768,
54
+ 1152
55
+ ],
56
+ [
57
+ 1152,
58
+ 768
59
+ ],
60
+ [
61
+ 384,
62
+ 2688
63
+ ],
64
+ [
65
+ 2688,
66
+ 384
67
+ ],
68
+ [
69
+ 384,
70
+ 3072
71
+ ],
72
+ [
73
+ 3072,
74
+ 384
75
+ ],
76
+ [
77
+ 768,
78
+ 1536
79
+ ],
80
+ [
81
+ 1536,
82
+ 768
83
+ ],
84
+ [
85
+ 384,
86
+ 3456
87
+ ],
88
+ [
89
+ 3456,
90
+ 384
91
+ ],
92
+ [
93
+ 1152,
94
+ 1152
95
+ ]
96
+ ],
97
+ "downsample_ratio": 2,
98
+ "ignore_id": -100,
99
+ "image_mean": [
100
+ 0.5,
101
+ 0.5,
102
+ 0.5
103
+ ],
104
+ "image_std": [
105
+ 0.5,
106
+ 0.5,
107
+ 0.5
108
+ ],
109
+ "image_token": "<image>",
110
+ "mask_prompt": false,
111
+ "normalize": true,
112
+ "pad_token": "<\uff5c\u2581pad\u2581\uff5c>",
113
+ "patch_size": 14,
114
+ "processor_class": "DeepseekVLV2Processor",
115
+ "sft_format": "deepseek"
116
+ }
quantization_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 32,
4
+ "sym": true,
5
+ "data_type": "int",
6
+ "enable_quanted_input": true,
7
+ "enable_minmax_tuning": true,
8
+ "seqlen": 2048,
9
+ "batch_size": 8,
10
+ "scale_dtype": "torch.float16",
11
+ "lr": 0.005,
12
+ "minmax_lr": 0.005,
13
+ "gradient_accumulate_steps": 1,
14
+ "iters": 200,
15
+ "amp": true,
16
+ "nsamples": 128,
17
+ "low_gpu_mem_usage": false,
18
+ "to_quant_block_names": "language.model.layers",
19
+ "enable_norm_bias_tuning": false,
20
+ "dataset": "NeelNanda/pile-10k",
21
+ "autoround_version": "0.4.5",
22
+ "quant_method": "awq",
23
+ "zero_point": false,
24
+ "version": "gemm",
25
+ "modules_to_not_convert": [
26
+ "vision.blocks",
27
+ "projector.layers",
28
+ "vision.attn_pool.q",
29
+ "vision.attn_pool.kv",
30
+ "vision.attn_pool.proj",
31
+ "vision.attn_pool.mlp.fc1",
32
+ "vision.attn_pool.mlp.fc2",
33
+ "language.lm_head"
34
+ ]
35
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|User|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|Assistant|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": {
19
+ "content": "<|begin▁of▁sentence|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eos_token": {
26
+ "content": "<|end▁of▁sentence|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "pad_token": {
33
+ "content": "<|▁pad▁|>",
34
+ "lstrip": false,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ }
39
+ }
tmp_dir/config.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "deepseek-ai/deepseek-vl2-tiny",
4
+ "candidate_resolutions": [
5
+ [
6
+ 384,
7
+ 384
8
+ ],
9
+ [
10
+ 384,
11
+ 768
12
+ ],
13
+ [
14
+ 768,
15
+ 384
16
+ ],
17
+ [
18
+ 384,
19
+ 1152
20
+ ],
21
+ [
22
+ 1152,
23
+ 384
24
+ ],
25
+ [
26
+ 384,
27
+ 1536
28
+ ],
29
+ [
30
+ 1536,
31
+ 384
32
+ ],
33
+ [
34
+ 768,
35
+ 768
36
+ ],
37
+ [
38
+ 384,
39
+ 1920
40
+ ],
41
+ [
42
+ 1920,
43
+ 384
44
+ ],
45
+ [
46
+ 384,
47
+ 2304
48
+ ],
49
+ [
50
+ 2304,
51
+ 384
52
+ ],
53
+ [
54
+ 768,
55
+ 1152
56
+ ],
57
+ [
58
+ 1152,
59
+ 768
60
+ ],
61
+ [
62
+ 384,
63
+ 2688
64
+ ],
65
+ [
66
+ 2688,
67
+ 384
68
+ ],
69
+ [
70
+ 384,
71
+ 3072
72
+ ],
73
+ [
74
+ 3072,
75
+ 384
76
+ ],
77
+ [
78
+ 768,
79
+ 1536
80
+ ],
81
+ [
82
+ 1536,
83
+ 768
84
+ ],
85
+ [
86
+ 384,
87
+ 3456
88
+ ],
89
+ [
90
+ 3456,
91
+ 384
92
+ ],
93
+ [
94
+ 1152,
95
+ 1152
96
+ ]
97
+ ],
98
+ "global_view_pos": "head",
99
+ "language_config": {
100
+ "_attn_implementation_autoset": true,
101
+ "architectures": [
102
+ "DeepseekV2ForCausalLM"
103
+ ],
104
+ "auto_map": {
105
+ "AutoConfig": "configuration_deepseek.DeepseekV2Config",
106
+ "AutoModel": "modeling_deepseek.DeepseekV2Model",
107
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
108
+ },
109
+ "bos_token_id": 0,
110
+ "eos_token_id": 1,
111
+ "first_k_dense_replace": 1,
112
+ "hidden_size": 1280,
113
+ "intermediate_size": 6848,
114
+ "kv_lora_rank": null,
115
+ "lm_head": true,
116
+ "max_position_embeddings": 4096,
117
+ "model_type": "deepseek_v2",
118
+ "moe_intermediate_size": 896,
119
+ "n_group": 1,
120
+ "n_routed_experts": 64,
121
+ "n_shared_experts": 2,
122
+ "num_attention_heads": 10,
123
+ "num_experts_per_tok": 6,
124
+ "num_hidden_layers": 12,
125
+ "num_key_value_heads": 10,
126
+ "q_lora_rank": null,
127
+ "qk_nope_head_dim": 0,
128
+ "qk_rope_head_dim": 0,
129
+ "rm_head": false,
130
+ "topk_group": 1,
131
+ "topk_method": "greedy",
132
+ "torch_dtype": "bfloat16",
133
+ "use_mla": false,
134
+ "v_head_dim": 0,
135
+ "vocab_size": 129280
136
+ },
137
+ "model_type": "deepseek_vl_v2",
138
+ "projector_config": {
139
+ "model_type": "mlp_projector",
140
+ "n_embed": 1280
141
+ },
142
+ "tile_tag": "2D",
143
+ "torch_dtype": "bfloat16",
144
+ "transformers_version": "4.47.1",
145
+ "vision_config": {
146
+ "layers": 27,
147
+ "mlp_ratio": 3.7362,
148
+ "model_name": "siglip_so400m_patch14_384",
149
+ "model_type": "vision",
150
+ "patch_size": 14,
151
+ "width": 1152
152
+ }
153
+ }
tmp_dir/special_tokens_map.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|User|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|Assistant|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": {
19
+ "content": "<|begin▁of▁sentence|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eos_token": {
26
+ "content": "<|end▁of▁sentence|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "pad_token": {
33
+ "content": "<|▁pad▁|>",
34
+ "lstrip": false,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ }
39
+ }
tmp_dir/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tmp_dir/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff