babytreecc commited on
Commit
c66b240
·
verified ·
1 Parent(s): a071403

Upload DebertaV2ForSequenceClassification

Browse files
Files changed (2) hide show
  1. config.json +28 -309
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,324 +1,43 @@
1
  {
2
- "_name_or_path": "tasksource/ModernBERT-base-nli",
3
  "architectures": [
4
- "ModernBertForSequenceClassification"
5
  ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "bos_token_id": 50281,
9
- "classifier_activation": "gelu",
10
- "classifier_bias": false,
11
- "classifier_dropout": 0.0,
12
- "classifier_pooling": "mean",
13
- "classifiers_size": [
14
- 3,
15
- 2,
16
- 2,
17
- 2,
18
- 2,
19
- 2,
20
- 1,
21
- 2,
22
- 3,
23
- 2,
24
- 2,
25
- 2,
26
- 3,
27
- 3,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2,
34
- 3,
35
- 2,
36
- 2,
37
- 2,
38
- 2,
39
- 2,
40
- 6,
41
- 2,
42
- 2,
43
- 2,
44
- 2,
45
- 2,
46
- 3,
47
- 3,
48
- 3,
49
- 3,
50
- 3,
51
- 3,
52
- 3,
53
- 2,
54
- 2,
55
- 2,
56
- 2,
57
- 3,
58
- 3,
59
- 3,
60
- 3,
61
- 3,
62
- 3,
63
- 3,
64
- 3,
65
- 2,
66
- 2,
67
- 2,
68
- 2,
69
- 2,
70
- 2,
71
- 16,
72
- 100,
73
- 13,
74
- 100,
75
- 8,
76
- 3,
77
- 3,
78
- 2,
79
- 3,
80
- 2,
81
- 4,
82
- 3,
83
- 2,
84
- 3,
85
- 2,
86
- 2,
87
- 2,
88
- 2,
89
- 2,
90
- 3,
91
- 2,
92
- 3,
93
- 2,
94
- 4,
95
- 3,
96
- 3,
97
- 3,
98
- 2,
99
- 3,
100
- 1,
101
- 2,
102
- 2,
103
- 3,
104
- 13,
105
- 2,
106
- 2,
107
- 3,
108
- 2,
109
- 2,
110
- 3,
111
- 3,
112
- 3,
113
- 3,
114
- 2,
115
- 3,
116
- 3,
117
- 2,
118
- 3,
119
- 2,
120
- 2,
121
- 2,
122
- 2,
123
- 2,
124
- 3,
125
- 4,
126
- 3,
127
- 3,
128
- 2,
129
- 2,
130
- 3,
131
- 3,
132
- 2,
133
- 2,
134
- 2,
135
- 2,
136
- 2,
137
- 4,
138
- 3,
139
- 2,
140
- 2,
141
- 2,
142
- 3,
143
- 3,
144
- 3,
145
- 2,
146
- 3
147
- ],
148
- "cls_token_id": 50281,
149
- "decoder_bias": true,
150
- "deterministic_flash_attn": false,
151
- "embedding_dropout": 0.0,
152
- "eos_token_id": 50282,
153
- "global_attn_every_n_layers": 3,
154
- "global_rope_theta": 160000.0,
155
- "gradient_checkpointing": false,
156
- "hidden_activation": "gelu",
157
  "hidden_size": 768,
158
  "id2label": {
159
  "0": "LABEL_0"
160
  },
161
- "initializer_cutoff_factor": 2.0,
162
  "initializer_range": 0.02,
163
- "intermediate_size": 1152,
164
  "label2id": {
165
  "LABEL_0": 0
166
  },
167
- "layer_norm_eps": 1e-05,
168
- "local_attention": 128,
169
- "local_rope_theta": 10000.0,
170
- "max_position_embeddings": 2048,
171
- "mlp_bias": false,
172
- "mlp_dropout": 0.0,
173
- "model_type": "modernbert",
174
- "norm_bias": false,
175
- "norm_eps": 1e-05,
176
  "num_attention_heads": 12,
177
- "num_hidden_layers": 22,
178
- "pad_token_id": 50283,
179
- "position_embedding_type": "absolute",
180
- "problem_type": "single_label_classification",
181
- "reference_compile": true,
182
- "repad_logits_with_grad": false,
183
- "sep_token_id": 50282,
184
- "sparse_pred_ignore_index": -100,
185
- "sparse_prediction": false,
186
- "tasks": [
187
- "glue/mnli",
188
- "glue/qnli",
189
- "glue/rte",
190
- "glue/wnli",
191
- "glue/mrpc",
192
- "glue/qqp",
193
- "glue/stsb",
194
- "super_glue/boolq",
195
- "super_glue/cb",
196
- "super_glue/multirc",
197
- "super_glue/wic",
198
- "super_glue/axg",
199
- "anli/a1",
200
- "anli/a2",
201
- "anli/a3",
202
- "sick/label",
203
- "sick/entailment_AB",
204
- "snli",
205
- "scitail/snli_format",
206
- "hans",
207
- "WANLI",
208
- "recast/recast_ner",
209
- "recast/recast_sentiment",
210
- "recast/recast_verbnet",
211
- "recast/recast_megaveridicality",
212
- "recast/recast_verbcorner",
213
- "recast/recast_kg_relations",
214
- "recast/recast_factuality",
215
- "recast/recast_puns",
216
- "probability_words_nli/reasoning_1hop",
217
- "probability_words_nli/usnli",
218
- "probability_words_nli/reasoning_2hop",
219
- "nan-nli",
220
- "nli_fever",
221
- "breaking_nli",
222
- "conj_nli",
223
- "fracas",
224
- "dialogue_nli",
225
- "mpe",
226
- "dnc",
227
- "recast_white/fnplus",
228
- "recast_white/sprl",
229
- "recast_white/dpr",
230
- "robust_nli/IS_CS",
231
- "robust_nli/LI_LI",
232
- "robust_nli/ST_WO",
233
- "robust_nli/PI_SP",
234
- "robust_nli/PI_CD",
235
- "robust_nli/ST_SE",
236
- "robust_nli/ST_NE",
237
- "robust_nli/ST_LM",
238
- "robust_nli_is_sd",
239
- "robust_nli_li_ts",
240
- "add_one_rte",
241
- "paws/labeled_final",
242
- "glue/cola",
243
- "glue/sst2",
244
- "pragmeval/pdtb",
245
- "lex_glue/eurlex",
246
- "lex_glue/scotus",
247
- "lex_glue/ledgar",
248
- "lex_glue/unfair_tos",
249
- "dynasent/dynabench.dynasent.r1.all/r1",
250
- "dynasent/dynabench.dynasent.r2.all/r2",
251
- "cycic_classification",
252
- "lingnli",
253
- "monotonicity-entailment",
254
- "scinli",
255
- "naturallogic",
256
- "dynahate",
257
- "syntactic-augmentation-nli",
258
- "autotnli",
259
- "defeasible-nli/atomic",
260
- "defeasible-nli/snli",
261
- "help-nli",
262
- "nli-veridicality-transitivity",
263
- "lonli",
264
- "dadc-limit-nli",
265
- "folio",
266
- "tomi-nli",
267
- "puzzte",
268
- "temporal-nli",
269
- "counterfactually-augmented-snli",
270
- "cnli",
271
- "boolq-natural-perturbations",
272
- "equate",
273
- "chaos-mnli-ambiguity",
274
- "logiqa-2.0-nli",
275
- "mindgames",
276
- "ConTRoL-nli",
277
- "logical-fallacy",
278
- "cladder",
279
- "conceptrules_v2",
280
- "zero-shot-label-nli",
281
- "scone",
282
- "monli",
283
- "SpaceNLI",
284
- "propsegment/nli",
285
- "FLD.v2/default",
286
- "FLD.v2/star",
287
- "SDOH-NLI",
288
- "scifact_entailment",
289
- "AdjectiveScaleProbe-nli",
290
- "resnli",
291
- "semantic_fragments_nli",
292
- "dataset_train_nli",
293
- "nlgraph",
294
- "ruletaker",
295
- "PARARULE-Plus",
296
- "logical-entailment",
297
- "nope",
298
- "LogicNLI",
299
- "contract-nli/contractnli_a/seg",
300
- "contract-nli/contractnli_b/full",
301
- "nli4ct_semeval2024",
302
- "biosift-nli",
303
- "SIGA-nli",
304
- "FOL-nli",
305
- "doc-nli",
306
- "mctest-nli",
307
- "natural-language-satisfiability",
308
- "idioms-nli",
309
- "lifecycle-entailment",
310
- "MSciNLI",
311
- "hover-3way/nli",
312
- "seahorse_summarization_evaluation",
313
- "missing-item-prediction/contrastive",
314
- "Pol_NLI",
315
- "synthetic-retrieval-NLI/count",
316
- "synthetic-retrieval-NLI/position",
317
- "synthetic-retrieval-NLI/binary",
318
- "babi_nli",
319
- "gen_debiased_nli"
320
  ],
321
- "torch_dtype": "bfloat16",
 
 
 
 
322
  "transformers_version": "4.49.0",
323
- "vocab_size": 50368
 
324
  }
 
1
  {
2
+ "_name_or_path": "mrm8488/deberta-v3-small-finetuned-mnli",
3
  "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "finetuning_task": "mnli",
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "hidden_size": 768,
11
  "id2label": {
12
  "0": "LABEL_0"
13
  },
 
14
  "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
  "label2id": {
17
  "LABEL_0": 0
18
  },
19
+ "layer_norm_eps": 1e-07,
20
+ "legacy": true,
21
+ "max_position_embeddings": 512,
22
+ "max_relative_positions": -1,
23
+ "model_type": "deberta-v2",
24
+ "norm_rel_ebd": "layer_norm",
 
 
 
25
  "num_attention_heads": 12,
26
+ "num_hidden_layers": 6,
27
+ "pad_token_id": 0,
28
+ "pooler_dropout": 0,
29
+ "pooler_hidden_act": "gelu",
30
+ "pooler_hidden_size": 768,
31
+ "pos_att_type": [
32
+ "p2c",
33
+ "c2p"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ],
35
+ "position_biased_input": false,
36
+ "position_buckets": 256,
37
+ "relative_attention": true,
38
+ "share_att_key": true,
39
+ "torch_dtype": "float32",
40
  "transformers_version": "4.49.0",
41
+ "type_vocab_size": 0,
42
+ "vocab_size": 128100
43
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efa8200bf362ab0e1bf7d746c1830ba4c0b25345544e1c7aee94945e53165d1e
3
- size 299225554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df955d96f41d03daeb6d35e50021b816dbdbe07984214d6c848e1d867f2456c9
3
+ size 567595468