babytreecc
/

rr_tldr_reward_8_0.01

Text Classification

Transformers

Safetensors

deberta-v2

Inference Endpoints

Model card Files Files and versions Community

babytreecc commited on 7 days ago

Commit

c66b240

verified ·

1 Parent(s): a071403

Upload DebertaV2ForSequenceClassification

Browse files

Files changed (2) hide show

config.json +28 -309
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -1,324 +1,43 @@
 {
-  "_name_or_path": "tasksource/ModernBERT-base-nli",
   "architectures": [
-    "ModernBertForSequenceClassification"
   ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "bos_token_id": 50281,
-  "classifier_activation": "gelu",
-  "classifier_bias": false,
-  "classifier_dropout": 0.0,
-  "classifier_pooling": "mean",
-  "classifiers_size": [
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    1,
-    2,
-    3,
-    2,
-    2,
-    2,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    6,
-    2,
-    2,
-    2,
-    2,
-    2,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2,
-    2,
-    2,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    2,
-    16,
-    100,
-    13,
-    100,
-    8,
-    3,
-    3,
-    2,
-    3,
-    2,
-    4,
-    3,
-    2,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    3,
-    2,
-    3,
-    2,
-    4,
-    3,
-    3,
-    3,
-    2,
-    3,
-    1,
-    2,
-    2,
-    3,
-    13,
-    2,
-    2,
-    3,
-    2,
-    2,
-    3,
-    3,
-    3,
-    3,
-    2,
-    3,
-    3,
-    2,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    3,
-    4,
-    3,
-    3,
-    2,
-    2,
-    3,
-    3,
-    2,
-    2,
-    2,
-    2,
-    2,
-    4,
-    3,
-    2,
-    2,
-    2,
-    3,
-    3,
-    3,
-    2,
-    3
-  ],
-  "cls_token_id": 50281,
-  "decoder_bias": true,
-  "deterministic_flash_attn": false,
-  "embedding_dropout": 0.0,
-  "eos_token_id": 50282,
-  "global_attn_every_n_layers": 3,
-  "global_rope_theta": 160000.0,
-  "gradient_checkpointing": false,
-  "hidden_activation": "gelu",
   "hidden_size": 768,
   "id2label": {
     "0": "LABEL_0"
   },
-  "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
-  "intermediate_size": 1152,
   "label2id": {
     "LABEL_0": 0
   },
-  "layer_norm_eps": 1e-05,
-  "local_attention": 128,
-  "local_rope_theta": 10000.0,
-  "max_position_embeddings": 2048,
-  "mlp_bias": false,
-  "mlp_dropout": 0.0,
-  "model_type": "modernbert",
-  "norm_bias": false,
-  "norm_eps": 1e-05,
   "num_attention_heads": 12,
-  "num_hidden_layers": 22,
-  "pad_token_id": 50283,
-  "position_embedding_type": "absolute",
-  "problem_type": "single_label_classification",
-  "reference_compile": true,
-  "repad_logits_with_grad": false,
-  "sep_token_id": 50282,
-  "sparse_pred_ignore_index": -100,
-  "sparse_prediction": false,
-  "tasks": [
-    "glue/mnli",
-    "glue/qnli",
-    "glue/rte",
-    "glue/wnli",
-    "glue/mrpc",
-    "glue/qqp",
-    "glue/stsb",
-    "super_glue/boolq",
-    "super_glue/cb",
-    "super_glue/multirc",
-    "super_glue/wic",
-    "super_glue/axg",
-    "anli/a1",
-    "anli/a2",
-    "anli/a3",
-    "sick/label",
-    "sick/entailment_AB",
-    "snli",
-    "scitail/snli_format",
-    "hans",
-    "WANLI",
-    "recast/recast_ner",
-    "recast/recast_sentiment",
-    "recast/recast_verbnet",
-    "recast/recast_megaveridicality",
-    "recast/recast_verbcorner",
-    "recast/recast_kg_relations",
-    "recast/recast_factuality",
-    "recast/recast_puns",
-    "probability_words_nli/reasoning_1hop",
-    "probability_words_nli/usnli",
-    "probability_words_nli/reasoning_2hop",
-    "nan-nli",
-    "nli_fever",
-    "breaking_nli",
-    "conj_nli",
-    "fracas",
-    "dialogue_nli",
-    "mpe",
-    "dnc",
-    "recast_white/fnplus",
-    "recast_white/sprl",
-    "recast_white/dpr",
-    "robust_nli/IS_CS",
-    "robust_nli/LI_LI",
-    "robust_nli/ST_WO",
-    "robust_nli/PI_SP",
-    "robust_nli/PI_CD",
-    "robust_nli/ST_SE",
-    "robust_nli/ST_NE",
-    "robust_nli/ST_LM",
-    "robust_nli_is_sd",
-    "robust_nli_li_ts",
-    "add_one_rte",
-    "paws/labeled_final",
-    "glue/cola",
-    "glue/sst2",
-    "pragmeval/pdtb",
-    "lex_glue/eurlex",
-    "lex_glue/scotus",
-    "lex_glue/ledgar",
-    "lex_glue/unfair_tos",
-    "dynasent/dynabench.dynasent.r1.all/r1",
-    "dynasent/dynabench.dynasent.r2.all/r2",
-    "cycic_classification",
-    "lingnli",
-    "monotonicity-entailment",
-    "scinli",
-    "naturallogic",
-    "dynahate",
-    "syntactic-augmentation-nli",
-    "autotnli",
-    "defeasible-nli/atomic",
-    "defeasible-nli/snli",
-    "help-nli",
-    "nli-veridicality-transitivity",
-    "lonli",
-    "dadc-limit-nli",
-    "folio",
-    "tomi-nli",
-    "puzzte",
-    "temporal-nli",
-    "counterfactually-augmented-snli",
-    "cnli",
-    "boolq-natural-perturbations",
-    "equate",
-    "chaos-mnli-ambiguity",
-    "logiqa-2.0-nli",
-    "mindgames",
-    "ConTRoL-nli",
-    "logical-fallacy",
-    "cladder",
-    "conceptrules_v2",
-    "zero-shot-label-nli",
-    "scone",
-    "monli",
-    "SpaceNLI",
-    "propsegment/nli",
-    "FLD.v2/default",
-    "FLD.v2/star",
-    "SDOH-NLI",
-    "scifact_entailment",
-    "AdjectiveScaleProbe-nli",
-    "resnli",
-    "semantic_fragments_nli",
-    "dataset_train_nli",
-    "nlgraph",
-    "ruletaker",
-    "PARARULE-Plus",
-    "logical-entailment",
-    "nope",
-    "LogicNLI",
-    "contract-nli/contractnli_a/seg",
-    "contract-nli/contractnli_b/full",
-    "nli4ct_semeval2024",
-    "biosift-nli",
-    "SIGA-nli",
-    "FOL-nli",
-    "doc-nli",
-    "mctest-nli",
-    "natural-language-satisfiability",
-    "idioms-nli",
-    "lifecycle-entailment",
-    "MSciNLI",
-    "hover-3way/nli",
-    "seahorse_summarization_evaluation",
-    "missing-item-prediction/contrastive",
-    "Pol_NLI",
-    "synthetic-retrieval-NLI/count",
-    "synthetic-retrieval-NLI/position",
-    "synthetic-retrieval-NLI/binary",
-    "babi_nli",
-    "gen_debiased_nli"
   ],
-  "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0",
-  "vocab_size": 50368
 }

 {
+  "_name_or_path": "mrm8488/deberta-v3-small-finetuned-mnli",
   "architectures": [
+    "DebertaV2ForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "finetuning_task": "mnli",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
     "0": "LABEL_0"
   },
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
   "label2id": {
     "LABEL_0": 0
   },
+  "layer_norm_eps": 1e-07,
+  "legacy": true,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
   "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
   ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
   "transformers_version": "4.49.0",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efa8200bf362ab0e1bf7d746c1830ba4c0b25345544e1c7aee94945e53165d1e
-size 299225554

 version https://git-lfs.github.com/spec/v1
+oid sha256:df955d96f41d03daeb6d35e50021b816dbdbe07984214d6c848e1d867f2456c9
+size 567595468