Upload trained SetFit model

Browse files

Files changed (14) hide show

1_Pooling/config.json +1 -1
README.md +40 -60
added_tokens.json +5 -0
config.json +21 -25
config_sentence_transformers.json +4 -1
merges.txt +0 -0
model.safetensors +2 -2
model_head.pkl +2 -2
modules.json +6 -0
sentence_bert_config.json +1 -1
special_tokens_map.json +7 -24
tokenizer.json +0 -0
tokenizer_config.json +34 -35
vocab.json +0 -0

1_Pooling/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "word_embedding_dimension": 768,
   "pooling_mode_cls_token": false,
   "pooling_mode_mean_tokens": true,
   "pooling_mode_max_tokens": false,

 {
+  "word_embedding_dimension": 896,
   "pooling_mode_cls_token": false,
   "pooling_mode_mean_tokens": true,
   "pooling_mode_max_tokens": false,

README.md CHANGED Viewed

@@ -5,21 +5,20 @@ tags:
 - text-classification
 - generated_from_setfit_trainer
 widget:
-- text: Utilita Energy to pay £175,000 after failing to meet carbon emission reduction
-    obligations
-- text: Ofgem appoints preferred bidder for Burbo Bank Extension offshore transmission
-    assets
-- text: Aveni secures £11m to build LLM for financial services
-- text: LG CNS, Aeon to collaborate on EdTech in Japan
-- text: US to Buy Norwegian Joint Strike Missile for its F-35A Stealth Aircraft
 metrics:
 - accuracy
 pipeline_tag: text-classification
 library_name: setfit
 inference: false
-base_model: infgrad/stella-base-en-v2
 model-index:
-- name: SetFit with infgrad/stella-base-en-v2
   results:
   - task:
       type: text-classification
@@ -30,13 +29,13 @@ model-index:
       split: test
     metrics:
     - type: accuracy
-      value: 0.6353790613718412
       name: Accuracy
 ---
-# SetFit with infgrad/stella-base-en-v2
-This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [infgrad/stella-base-en-v2](https://huggingface.co/infgrad/stella-base-en-v2) as the Sentence Transformer embedding model. A OneVsRestClassifier instance is used for classification.
 The model has been trained using an efficient few-shot learning technique that involves:
@@ -47,9 +46,9 @@ The model has been trained using an efficient few-shot learning technique that i
 ### Model Description
 - **Model Type:** SetFit
-- **Sentence Transformer body:** [infgrad/stella-base-en-v2](https://huggingface.co/infgrad/stella-base-en-v2)
 - **Classification head:** a OneVsRestClassifier instance
-- **Maximum Sequence Length:** 512 tokens
 <!-- - **Number of Classes:** Unknown -->
 <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
 <!-- - **Language:** Unknown -->
@@ -66,7 +65,7 @@ The model has been trained using an efficient few-shot learning technique that i
 ### Metrics
 | Label   | Accuracy |
 |:--------|:---------|
-| **all** | 0.6354   |
 ## Uses
@@ -86,7 +85,7 @@ from setfit import SetFitModel
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("amplyfi/all-labels")
 # Run inference
-preds = model("LG CNS, Aeon to collaborate on EdTech in Japan")
 ```
 <!--
@@ -118,14 +117,14 @@ preds = model("LG CNS, Aeon to collaborate on EdTech in Japan")
 ### Training Set Metrics
 | Training set | Min | Median | Max |
 |:-------------|:----|:-------|:----|
-| Word count   | 4   | 9.9566 | 29  |
 ### Training Hyperparameters
 - batch_size: (16, 16)
 - num_epochs: (2, 2)
 - max_steps: -1
 - sampling_strategy: oversampling
-- num_iterations: 10
 - body_learning_rate: (2e-05, 2e-05)
 - head_learning_rate: 2e-05
 - loss: CosineSimilarityLoss
@@ -142,48 +141,29 @@ preds = model("LG CNS, Aeon to collaborate on EdTech in Japan")
 ### Training Results
 | Epoch  | Step | Training Loss | Validation Loss |
 |:------:|:----:|:-------------:|:---------------:|
-| 0.0010 | 1    | 0.1474        | -               |
-| 0.0482 | 50   | 0.2165        | -               |
-| 0.0963 | 100  | 0.1969        | -               |
-| 0.1445 | 150  | 0.1609        | -               |
-| 0.1927 | 200  | 0.1175        | -               |
-| 0.2408 | 250  | 0.0956        | -               |
-| 0.2890 | 300  | 0.0783        | -               |
-| 0.3372 | 350  | 0.0689        | -               |
-| 0.3854 | 400  | 0.0513        | -               |
-| 0.4335 | 450  | 0.0486        | -               |
-| 0.4817 | 500  | 0.0651        | -               |
-| 0.5299 | 550  | 0.0612        | -               |
-| 0.5780 | 600  | 0.0537        | -               |
-| 0.6262 | 650  | 0.0363        | -               |
-| 0.6744 | 700  | 0.0408        | -               |
-| 0.7225 | 750  | 0.0413        | -               |
-| 0.7707 | 800  | 0.0373        | -               |
-| 0.8189 | 850  | 0.0327        | -               |
-| 0.8671 | 900  | 0.0278        | -               |
-| 0.9152 | 950  | 0.0357        | -               |
-| 0.9634 | 1000 | 0.0291        | -               |
-| 1.0116 | 1050 | 0.0227        | -               |
-| 1.0597 | 1100 | 0.0178        | -               |
-| 1.1079 | 1150 | 0.0224        | -               |
-| 1.1561 | 1200 | 0.0193        | -               |
-| 1.2042 | 1250 | 0.0205        | -               |
-| 1.2524 | 1300 | 0.019         | -               |
-| 1.3006 | 1350 | 0.0176        | -               |
-| 1.3487 | 1400 | 0.0196        | -               |
-| 1.3969 | 1450 | 0.0147        | -               |
-| 1.4451 | 1500 | 0.0209        | -               |
-| 1.4933 | 1550 | 0.0161        | -               |
-| 1.5414 | 1600 | 0.0164        | -               |
-| 1.5896 | 1650 | 0.0188        | -               |
-| 1.6378 | 1700 | 0.0153        | -               |
-| 1.6859 | 1750 | 0.0167        | -               |
-| 1.7341 | 1800 | 0.0198        | -               |
-| 1.7823 | 1850 | 0.0157        | -               |
-| 1.8304 | 1900 | 0.0168        | -               |
-| 1.8786 | 1950 | 0.0128        | -               |
-| 1.9268 | 2000 | 0.0165        | -               |
-| 1.9750 | 2050 | 0.0121        | -               |
 ### Framework Versions
 - Python: 3.10.12

 - text-classification
 - generated_from_setfit_trainer
 widget:
+- text: Be.EV partners with Paua to add more than 700 charge points to the Paua network
+- text: UAE’s Artificial Intelligence Office, Mastercard and First Abu Dhabi Bank
+    Launch Joint AI Challenge
+- text: 'Supply Licence Review: Ofgem''s role in enforcing industry codes'
+- text: Air Astana, Neos Enter into Strategic Partnership
+- text: Ofgem protects customers of failed supplier Rutherford Energy Supply Limited
 metrics:
 - accuracy
 pipeline_tag: text-classification
 library_name: setfit
 inference: false
+base_model: HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5
 model-index:
+- name: SetFit with HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5
   results:
   - task:
       type: text-classification
       split: test
     metrics:
     - type: accuracy
+      value: 0.6441441441441441
       name: Accuracy
 ---
+# SetFit with HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5
+This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5](https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5) as the Sentence Transformer embedding model. A OneVsRestClassifier instance is used for classification.
 The model has been trained using an efficient few-shot learning technique that involves:
 ### Model Description
 - **Model Type:** SetFit
+- **Sentence Transformer body:** [HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5](https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5)
 - **Classification head:** a OneVsRestClassifier instance
+- **Maximum Sequence Length:** 32768 tokens
 <!-- - **Number of Classes:** Unknown -->
 <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
 <!-- - **Language:** Unknown -->
 ### Metrics
 | Label   | Accuracy |
 |:--------|:---------|
+| **all** | 0.6441   |
 ## Uses
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("amplyfi/all-labels")
 # Run inference
+preds = model("Air Astana, Neos Enter into Strategic Partnership")
 ```
 <!--
 ### Training Set Metrics
 | Training set | Min | Median | Max |
 |:-------------|:----|:-------|:----|
+| Word count   | 4   | 9.9797 | 30  |
 ### Training Hyperparameters
 - batch_size: (16, 16)
 - num_epochs: (2, 2)
 - max_steps: -1
 - sampling_strategy: oversampling
+- num_iterations: 5
 - body_learning_rate: (2e-05, 2e-05)
 - head_learning_rate: 2e-05
 - loss: CosineSimilarityLoss
 ### Training Results
 | Epoch  | Step | Training Loss | Validation Loss |
 |:------:|:----:|:-------------:|:---------------:|
+| 0.0018 | 1    | 0.3185        | -               |
+| 0.0903 | 50   | 0.2296        | -               |
+| 0.1805 | 100  | 0.1307        | -               |
+| 0.2708 | 150  | 0.0955        | -               |
+| 0.3610 | 200  | 0.08          | -               |
+| 0.4513 | 250  | 0.0687        | -               |
+| 0.5415 | 300  | 0.0591        | -               |
+| 0.6318 | 350  | 0.0545        | -               |
+| 0.7220 | 400  | 0.0538        | -               |
+| 0.8123 | 450  | 0.0482        | -               |
+| 0.9025 | 500  | 0.0327        | -               |
+| 0.9928 | 550  | 0.0332        | -               |
+| 1.0830 | 600  | 0.0315        | -               |
+| 1.1733 | 650  | 0.0188        | -               |
+| 1.2635 | 700  | 0.016         | -               |
+| 1.3538 | 750  | 0.016         | -               |
+| 1.4440 | 800  | 0.0167        | -               |
+| 1.5343 | 850  | 0.0128        | -               |
+| 1.6245 | 900  | 0.0182        | -               |
+| 1.7148 | 950  | 0.0113        | -               |
+| 1.8051 | 1000 | 0.014         | -               |
+| 1.8953 | 1050 | 0.0151        | -               |
+| 1.9856 | 1100 | 0.0153        | -               |
 ### Framework Versions
 - Python: 3.10.12

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644
+}

config.json CHANGED Viewed

@@ -1,32 +1,28 @@
 {
-  "_name_or_path": "infgrad/stella-base-en-v2",
   "architectures": [
-    "BertModel"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "id2label": {
-    "0": "LABEL_0"
-  },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label2id": {
-    "LABEL_0": 0
-  },
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.42.2",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
 }

 {
+  "_name_or_path": "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5",
   "architectures": [
+    "Qwen2Model"
   ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 896,
   "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "max_position_embeddings": 131072,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": true,
   "torch_dtype": "float32",
   "transformers_version": "4.42.2",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
 }

config_sentence_transformers.json CHANGED Viewed

@@ -4,7 +4,10 @@
     "transformers": "4.42.2",
     "pytorch": "2.5.1+cu124"
   },
-  "prompts": {},
   "default_prompt_name": null,
   "similarity_fn_name": "cosine"
 }

     "transformers": "4.42.2",
     "pytorch": "2.5.1+cu124"
   },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
   "default_prompt_name": null,
   "similarity_fn_name": "cosine"
 }

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2773ffa701d578cb282d1aa6b7ad5b80c4af07f73d90af030ec284cc37479f32
-size 437951328

 version https://git-lfs.github.com/spec/v1
+oid sha256:5292fdb77075fc7c073101ce5d1de5a8519e07ce5428101ed6891b827ea82938
+size 1976161736

model_head.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e5ad649e8942c9b81f6de27817d0415a1cc3bc89445f6d253e44e1ce9262c1e
-size 117652

 version https://git-lfs.github.com/spec/v1
+oid sha256:f68b0434348a1d9f0132b0f79b1ddd5f3caafbc7d4de9225d0a8bd1bae5fd447
+size 136084

modules.json CHANGED Viewed

@@ -10,5 +10,11 @@
     "name": "1",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
   }
 ]

     "name": "1",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
   }
 ]

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "max_seq_length": 512,
   "do_lower_case": false
 }

 {
+  "max_seq_length": 32768,
   "do_lower_case": false
 }

special_tokens_map.json CHANGED Viewed

@@ -1,34 +1,17 @@
 {
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,39 +1,24 @@
 {
   "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "100": {
-      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -41,17 +26,31 @@
       "special": true
     }
   },
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "never_split": null,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
 }

 {
+  "add_prefix_space": false,
   "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "151644": {
+      "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "151645": {
+      "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     }
   },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "auto_map": {
+    "AutoTokenizer": [
+      "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5--tokenization_qwen.Qwen2Tokenizer",
+      "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5--tokenization_qwen.Qwen2TokenizerFast"
+    ]
+  },
+  "bos_token": null,
+  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "max_length": 512,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<|endoftext|>",
+  "pad_token_type_id": 0,
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "stride": 0,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": null
 }

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff