Add new SentenceTransformer model

Browse files

Files changed (11) hide show

1_Pooling/config.json +10 -0
README.md +596 -0
config.json +24 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +73 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,596 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:684
+- loss:MultipleNegativesRankingLoss
+base_model: sentence-transformers/multi-qa-mpnet-base-dot-v1
+widget:
+- source_sentence: '
+    We request that guests report any complaints and defects to the hotel reception
+    or hotel
+    management in person. Your complaints shall be attended to immediately.'
+  sentences:
+  - '
+    Animals may not be allowed onto beds or other furniture, which serves for
+    guests. It is not permitted to use baths, showers or washbasins for bathing or
+    washing animals.'
+  - '
+    We request that guests report any complaints and defects to the hotel reception
+    or hotel
+    management in person. Your complaints shall be attended to immediately.'
+  - '
+    Guests who take accommodation after midnight, shall still pay the price for
+    accommodation for the whole of the preceding night. The hotel’s official Check-in
+    time is
+    from 02:00 pm. For a possible early check-in, please consult with the reservation
+    team, or
+    the reception in advance.'
+- source_sentence: '
+    Hotel guests may receive visits in their hotel rooms from guests not staying in
+    the hotel.
+    Visitors must present a personal document at the hotel reception and register
+    in the visitors''
+    book. These visits can last for only a maximum of 2 hours and must finish until
+    10:00 pm.'
+  sentences:
+  - '
+    Hotel guests may receive visits in their hotel rooms from guests not staying in
+    the hotel.
+    Visitors must present a personal document at the hotel reception and register
+    in the visitors''
+    book. These visits can last for only a maximum of 2 hours and must finish until
+    10:00 pm.'
+  - '  If you do not want someone to enter
+    your room, please hang the "do not disturb” card on your room’s outside door handle.
+    It can
+    be found in the entrance area of your room.'
+  - '
+    Hotel guests may receive visits in their hotel rooms from guests not staying in
+    the hotel.
+    Visitors must present a personal document at the hotel reception and register
+    in the visitors''
+    book. These visits can last for only a maximum of 2 hours and must finish until
+    10:00 pm.'
+- source_sentence: '
+    Guests may not use their own electrical appliances in the hotel building except
+    for those
+    serving for personal hygiene (electrical shavers or massaging machines, hairdryers
+    etc.), or
+    personal computers and telephone chargers. The rooms own electrical devices shall
+    only be
+    used according to their main purpose.'
+  sentences:
+  - '
+    Pets are allowed in the hotel restaurant only from 12:00, provided the
+    animal''s behavior and cleanliness are adequate and they do not disturb other
+    guests. '
+  - '
+    Guests may not use their own electrical appliances in the hotel building except
+    for those
+    serving for personal hygiene (electrical shavers or massaging machines, hairdryers
+    etc.), or
+    personal computers and telephone chargers. The rooms own electrical devices shall
+    only be
+    used according to their main purpose.'
+  - ' For a possible late check-out please consult with the reception
+    in time, and upon availability we may grant a later check-out for a supplemental
+    fee.'
+- source_sentence: '
+    The hotel may provide accommodation only for guests who register in the regular
+    manner. For this purpose, the guest must present a personal document (citizen''s
+    identification card), or a valid passport to the receptionist. Accepting these
+    Rules of the
+    House is also obligatory for the registration.'
+  sentences:
+  - '
+    Hotel guests are obliged to abide by the provisions of these hotel regulations.
+    In the case of
+    serious violation, the reception or hotel management may withdraw from the contract
+    on
+    accommodation services before the elapse of the agreed period.'
+  - '
+    Hotel guests are responsible for given room keys during their whole stay. In case
+    of loss, the
+    guests are asked to inform reception staff immediately in order to prevent abusing
+    the key.
+    Losing the room key will result in a penalty of 20 Eur, which is to be paid on
+    the spot, at the
+    reception.'
+  - '
+    The hotel may provide accommodation only for guests who register in the regular
+    manner. For this purpose, the guest must present a personal document (citizen''s
+    identification card), or a valid passport to the receptionist. Accepting these
+    Rules of the
+    House is also obligatory for the registration.'
+- source_sentence: '
+    Guests are responsible for damages caused to hotel property according to the valid
+    legal
+    prescriptions of Hungary.'
+  sentences:
+  - '
+    We shall be happy to listen to any suggestions for improvement of the accommodation
+    and catering services in the hotel. In case of any complaints we shall purposefully
+    arrange
+    the rectification of any insufficiencies.'
+  - '
+    Guests are responsible for damages caused to hotel property according to the valid
+    legal
+    prescriptions of Hungary.'
+  - '
+    Guests are responsible for damages caused to hotel property according to the valid
+    legal
+    prescriptions of Hungary.'
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- dot_accuracy
+- dot_accuracy_threshold
+- dot_f1
+- dot_f1_threshold
+- dot_precision
+- dot_recall
+- dot_ap
+- dot_mcc
+model-index:
+- name: SentenceTransformer based on sentence-transformers/multi-qa-mpnet-base-dot-v1
+  results:
+  - task:
+      type: binary-classification
+      name: Binary Classification
+    dataset:
+      name: Unknown
+      type: unknown
+    metrics:
+    - type: dot_accuracy
+      value: 0.6549707602339181
+      name: Dot Accuracy
+    - type: dot_accuracy_threshold
+      value: 48.36168670654297
+      name: Dot Accuracy Threshold
+    - type: dot_f1
+      value: 0.5142857142857143
+      name: Dot F1
+    - type: dot_f1_threshold
+      value: 40.011634826660156
+      name: Dot F1 Threshold
+    - type: dot_precision
+      value: 0.36
+      name: Dot Precision
+    - type: dot_recall
+      value: 0.9
+      name: Dot Recall
+    - type: dot_ap
+      value: 0.3570718807651215
+      name: Dot Ap
+    - type: dot_mcc
+      value: 0.03879793956580217
+      name: Dot Mcc
+---
+# SentenceTransformer based on sentence-transformers/multi-qa-mpnet-base-dot-v1
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sentence-transformers/multi-qa-mpnet-base-dot-v1](https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1) <!-- at revision 4633e80e17ea975bc090c97b049da26062b054d3 -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Dot Product
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("Marco127/Base_T")
+# Run inference
+sentences = [
+    '\nGuests are responsible for damages caused to hotel property according to the valid legal\nprescriptions of Hungary.',
+    '\nGuests are responsible for damages caused to hotel property according to the valid legal\nprescriptions of Hungary.',
+    '\nWe shall be happy to listen to any suggestions for improvement of the accommodation\nand catering services in the hotel. In case of any complaints we shall purposefully arrange\nthe rectification of any insufficiencies.',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Binary Classification
+* Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
+| Metric                 | Value      |
+|:-----------------------|:-----------|
+| dot_accuracy           | 0.655      |
+| dot_accuracy_threshold | 48.3617    |
+| dot_f1                 | 0.5143     |
+| dot_f1_threshold       | 40.0116    |
+| dot_precision          | 0.36       |
+| dot_recall             | 0.9        |
+| **dot_ap**             | **0.3571** |
+| dot_mcc                | 0.0388     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 684 training samples
+* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
+* Approximate statistics based on the first 684 samples:
+  |         | sentence1                                                                          | sentence2                                                                          | label                                           |
+  |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
+  | type    | string                                                                             | string                                                                             | int                                             |
+  | details | <ul><li>min: 17 tokens</li><li>mean: 42.77 tokens</li><li>max: 71 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 42.77 tokens</li><li>max: 71 tokens</li></ul> | <ul><li>0: ~67.11%</li><li>1: ~32.89%</li></ul> |
+* Samples:
+  | sentence1                                                                                                                                                                                                                                                                                      | sentence2                                                                                                                                                                                                                                                                                      | label          |
+  |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
+  | <code> If a guest fails to vacate<br>the room within the designated time, reception shall charge this guest for the following<br>night's accommodation fee.</code>                                                                                                                             | <code> If a guest fails to vacate<br>the room within the designated time, reception shall charge this guest for the following<br>night's accommodation fee.</code>                                                                                                                             | <code>0</code> |
+  | <code>  If you do not want someone to enter<br>your room, please hang the "do not disturb” card on your room’s outside door handle. It can<br>be found in the entrance area of your room.</code>                                                                                               | <code>  If you do not want someone to enter<br>your room, please hang the "do not disturb” card on your room’s outside door handle. It can<br>be found in the entrance area of your room.</code>                                                                                               | <code>0</code> |
+  | <code><br>Owners are responsible for ensuring that animals are kept quiet between the<br>hours of 10:00 pm and 06:00 am. In the case of failure to abide by this<br>regulation the guest may be asked to leave the hotel without a refund of the<br>price of the night's accommodation.</code> | <code><br>Owners are responsible for ensuring that animals are kept quiet between the<br>hours of 10:00 pm and 06:00 am. In the case of failure to abide by this<br>regulation the guest may be asked to leave the hotel without a refund of the<br>price of the night's accommodation.</code> | <code>0</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Evaluation Dataset
+#### Unnamed Dataset
+* Size: 171 evaluation samples
+* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
+* Approximate statistics based on the first 171 samples:
+  |         | sentence1                                                                          | sentence2                                                                          | label                                           |
+  |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
+  | type    | string                                                                             | string                                                                             | int                                             |
+  | details | <ul><li>min: 17 tokens</li><li>mean: 42.01 tokens</li><li>max: 71 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 42.01 tokens</li><li>max: 71 tokens</li></ul> | <ul><li>0: ~64.91%</li><li>1: ~35.09%</li></ul> |
+* Samples:
+  | sentence1                                                                                                                                                                                                                                                               | sentence2                                                                                                                                                                                                                                                               | label          |
+  |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
+  | <code><br>We shall be happy to listen to any suggestions for improvement of the accommodation<br>and catering services in the hotel. In case of any complaints we shall purposefully arrange<br>the rectification of any insufficiencies.</code>                        | <code><br>We shall be happy to listen to any suggestions for improvement of the accommodation<br>and catering services in the hotel. In case of any complaints we shall purposefully arrange<br>the rectification of any insufficiencies.</code>                        | <code>0</code> |
+  | <code><br>Between the hours of 10:00 pm and 06:00 am guests are obliged to maintain low noise<br>levels.</code>                                                                                                                                                         | <code><br>Between the hours of 10:00 pm and 06:00 am guests are obliged to maintain low noise<br>levels.</code>                                                                                                                                                         | <code>0</code> |
+  | <code><br>The hotel’s inner courtyard parking facility may be used only upon availability of parking<br>slots. Slots marked as ’Private’ are to be left free for their owners. For parking fees please<br>consult the reception or see the website of the hotel.</code> | <code><br>The hotel’s inner courtyard parking facility may be used only upon availability of parking<br>slots. Slots marked as ’Private’ are to be left free for their owners. For parking fees please<br>consult the reception or see the website of the hotel.</code> | <code>1</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 5
+- `warmup_ratio`: 0.1
+- `fp16`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 5
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | Validation Loss | dot_ap |
+|:------:|:----:|:-------------:|:---------------:|:------:|
+| -1     | -1   | -             | -               | 0.3571 |
+| 2.2791 | 100  | 0.0011        | 0.0000          | -      |
+| 4.5581 | 200  | 0.0           | 0.0000          | -      |
+### Framework Versions
+- Python: 3.11.11
+- Sentence Transformers: 3.4.1
+- Transformers: 4.48.3
+- PyTorch: 2.5.1+cu124
+- Accelerate: 1.3.0
+- Datasets: 3.2.0
+- Tokenizers: 0.21.0
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_name_or_path": "sentence-transformers/multi-qa-mpnet-base-dot-v1",
+  "architectures": [
+    "MPNetModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "mpnet",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "relative_attention_num_buckets": 32,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3",
+  "vocab_size": 30527
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.4.1",
+    "transformers": "4.48.3",
+    "pytorch": "2.5.1+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "dot"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dad03cbd630093647a3070ddf920f98114bc76b0b3b454142b8dcac4822490ae
+size 437967672

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30526": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "do_lower_case": true,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": 250,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "MPNetTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff