metadata
language: []
library_name: sentence-transformers
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated
base_model: sentence-transformers/stsb-distilbert-base
metrics:
- cosine_accuracy
- cosine_accuracy_threshold
- cosine_f1
- cosine_f1_threshold
- cosine_precision
- cosine_recall
- cosine_ap
- manhattan_accuracy
- manhattan_accuracy_threshold
- manhattan_f1
- manhattan_f1_threshold
- manhattan_precision
- manhattan_recall
- manhattan_ap
- euclidean_accuracy
- euclidean_accuracy_threshold
- euclidean_f1
- euclidean_f1_threshold
- euclidean_precision
- euclidean_recall
- euclidean_ap
- dot_accuracy
- dot_accuracy_threshold
- dot_f1
- dot_f1_threshold
- dot_precision
- dot_recall
- dot_ap
- max_accuracy
- max_accuracy_threshold
- max_f1
- max_f1_threshold
- max_precision
- max_recall
- max_ap
- average_precision
- f1
- precision
- recall
- threshold
- cosine_accuracy@1
- cosine_accuracy@3
- cosine_accuracy@5
- cosine_accuracy@10
- cosine_precision@1
- cosine_precision@3
- cosine_precision@5
- cosine_precision@10
- cosine_recall@1
- cosine_recall@3
- cosine_recall@5
- cosine_recall@10
- cosine_ndcg@10
- cosine_mrr@10
- cosine_map@100
- dot_accuracy@1
- dot_accuracy@3
- dot_accuracy@5
- dot_accuracy@10
- dot_precision@1
- dot_precision@3
- dot_precision@5
- dot_precision@10
- dot_recall@1
- dot_recall@3
- dot_recall@5
- dot_recall@10
- dot_ndcg@10
- dot_mrr@10
- dot_map@100
widget:
- source_sentence: How porn is made?
sentences:
- How is porn made?
- How do you study before a test?
- What is the best book for afcat?
- source_sentence: Is WW3 inevitable?
sentences:
- How close to WW3 are we?
- Is it ok not to know everything?
- How can I get good marks on my exam?
- source_sentence: How do stop smoking?
sentences:
- How did you quit/stop smoking?
- How can I gain weight naturally?
- What movie is the best movie of 2016?
- source_sentence: What is astrology?
sentences:
- What really is astrology?
- How do I control blood pressure?
- How should I reduce weight easily?
- source_sentence: What is SMS API?
sentences:
- What is an SMS API?
- How will Sound travel in SPACE?
- Do we live inside a black hole?
pipeline_tag: sentence-similarity
model-index:
- name: SentenceTransformer based on sentence-transformers/stsb-distilbert-base
results:
- task:
type: binary-classification
name: Binary Classification
dataset:
name: Unknown
type: unknown
metrics:
- type: cosine_accuracy
value: 0.770712179816613
name: Cosine Accuracy
- type: cosine_accuracy_threshold
value: 0.8169694542884827
name: Cosine Accuracy Threshold
- type: cosine_f1
value: 0.7086398522340053
name: Cosine F1
- type: cosine_f1_threshold
value: 0.7420324087142944
name: Cosine F1 Threshold
- type: cosine_precision
value: 0.6032968224704479
name: Cosine Precision
- type: cosine_recall
value: 0.8585539007639479
name: Cosine Recall
- type: cosine_ap
value: 0.7191176594498068
name: Cosine Ap
- type: manhattan_accuracy
value: 0.7729301344296882
name: Manhattan Accuracy
- type: manhattan_accuracy_threshold
value: 181.4663848876953
name: Manhattan Accuracy Threshold
- type: manhattan_f1
value: 0.7082838527457715
name: Manhattan F1
- type: manhattan_f1_threshold
value: 222.911865234375
name: Manhattan F1 Threshold
- type: manhattan_precision
value: 0.6063303659742829
name: Manhattan Precision
- type: manhattan_recall
value: 0.8514545875453353
name: Manhattan Recall
- type: manhattan_ap
value: 0.7188011305084623
name: Manhattan Ap
- type: euclidean_accuracy
value: 0.7736333883313948
name: Euclidean Accuracy
- type: euclidean_accuracy_threshold
value: 8.356552124023438
name: Euclidean Accuracy Threshold
- type: euclidean_f1
value: 0.7088200276731988
name: Euclidean F1
- type: euclidean_f1_threshold
value: 10.092880249023438
name: Euclidean F1 Threshold
- type: euclidean_precision
value: 0.6079037421348935
name: Euclidean Precision
- type: euclidean_recall
value: 0.8499112585847673
name: Euclidean Recall
- type: euclidean_ap
value: 0.719131590718056
name: Euclidean Ap
- type: dot_accuracy
value: 0.7441508209136891
name: Dot Accuracy
- type: dot_accuracy_threshold
value: 168.56625366210938
name: Dot Accuracy Threshold
- type: dot_f1
value: 0.6831510249103777
name: Dot F1
- type: dot_f1_threshold
value: 142.45849609375
name: Dot F1 Threshold
- type: dot_precision
value: 0.5665209879052749
name: Dot Precision
- type: dot_recall
value: 0.8602515626205726
name: Dot Recall
- type: dot_ap
value: 0.6693622133717865
name: Dot Ap
- type: max_accuracy
value: 0.7736333883313948
name: Max Accuracy
- type: max_accuracy_threshold
value: 181.4663848876953
name: Max Accuracy Threshold
- type: max_f1
value: 0.7088200276731988
name: Max F1
- type: max_f1_threshold
value: 222.911865234375
name: Max F1 Threshold
- type: max_precision
value: 0.6079037421348935
name: Max Precision
- type: max_recall
value: 0.8602515626205726
name: Max Recall
- type: max_ap
value: 0.719131590718056
name: Max Ap
- task:
type: paraphrase-mining
name: Paraphrase Mining
dataset:
name: dev
type: dev
metrics:
- type: average_precision
value: 0.47803306271270435
name: Average Precision
- type: f1
value: 0.5119182746878547
name: F1
- type: precision
value: 0.4683281412253375
name: Precision
- type: recall
value: 0.5644555694618273
name: Recall
- type: threshold
value: 0.8193174600601196
name: Threshold
- task:
type: information-retrieval
name: Information Retrieval
dataset:
name: Unknown
type: unknown
metrics:
- type: cosine_accuracy@1
value: 0.9654
name: Cosine Accuracy@1
- type: cosine_accuracy@3
value: 0.9904
name: Cosine Accuracy@3
- type: cosine_accuracy@5
value: 0.9948
name: Cosine Accuracy@5
- type: cosine_accuracy@10
value: 0.9974
name: Cosine Accuracy@10
- type: cosine_precision@1
value: 0.9654
name: Cosine Precision@1
- type: cosine_precision@3
value: 0.43553333333333333
name: Cosine Precision@3
- type: cosine_precision@5
value: 0.28064
name: Cosine Precision@5
- type: cosine_precision@10
value: 0.14934
name: Cosine Precision@10
- type: cosine_recall@1
value: 0.8251379240296788
name: Cosine Recall@1
- type: cosine_recall@3
value: 0.9549051140803786
name: Cosine Recall@3
- type: cosine_recall@5
value: 0.9757885342898082
name: Cosine Recall@5
- type: cosine_recall@10
value: 0.9898260744103871
name: Cosine Recall@10
- type: cosine_ndcg@10
value: 0.9786162291363164
name: Cosine Ndcg@10
- type: cosine_mrr@10
value: 0.9785615873015873
name: Cosine Mrr@10
- type: cosine_map@100
value: 0.9713888565523412
name: Cosine Map@100
- type: dot_accuracy@1
value: 0.9512
name: Dot Accuracy@1
- type: dot_accuracy@3
value: 0.985
name: Dot Accuracy@3
- type: dot_accuracy@5
value: 0.9914
name: Dot Accuracy@5
- type: dot_accuracy@10
value: 0.9964
name: Dot Accuracy@10
- type: dot_precision@1
value: 0.9512
name: Dot Precision@1
- type: dot_precision@3
value: 0.4303333333333333
name: Dot Precision@3
- type: dot_precision@5
value: 0.2788
name: Dot Precision@5
- type: dot_precision@10
value: 0.14896
name: Dot Precision@10
- type: dot_recall@1
value: 0.8119095906963455
name: Dot Recall@1
- type: dot_recall@3
value: 0.9459636855089498
name: Dot Recall@3
- type: dot_recall@5
value: 0.9708092557905298
name: Dot Recall@5
- type: dot_recall@10
value: 0.9883617291912786
name: Dot Recall@10
- type: dot_ndcg@10
value: 0.9702609044345125
name: Dot Ndcg@10
- type: dot_mrr@10
value: 0.9693138888888887
name: Dot Mrr@10
- type: dot_map@100
value: 0.9599586870108953
name: Dot Map@100
SentenceTransformer based on sentence-transformers/stsb-distilbert-base
This is a sentence-transformers model finetuned from sentence-transformers/stsb-distilbert-base. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
- Model Type: Sentence Transformer
- Base model: sentence-transformers/stsb-distilbert-base
- Maximum Sequence Length: 128 tokens
- Output Dimensionality: 768 tokens
Model Sources
- Documentation: Sentence Transformers Documentation
- Repository: Sentence Transformers on GitHub
- Hugging Face: Sentence Transformers on Hugging Face
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: DistilBertModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("tomaarsen/stsb-distilbert-base-quora-duplicate-questions")
# Run inference
sentences = [
"What is a fetish?",
"What's a fetish?",
"Is it good to read sex stories?",
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
Evaluation
Metrics
Binary Classification
- Evaluated with
BinaryClassificationEvaluator
Metric | Value |
---|---|
cosine_accuracy | 0.7707 |
cosine_accuracy_threshold | 0.817 |
cosine_f1 | 0.7086 |
cosine_f1_threshold | 0.742 |
cosine_precision | 0.6033 |
cosine_recall | 0.8586 |
cosine_ap | 0.7191 |
manhattan_accuracy | 0.7729 |
manhattan_accuracy_threshold | 181.4664 |
manhattan_f1 | 0.7083 |
manhattan_f1_threshold | 222.9119 |
manhattan_precision | 0.6063 |
manhattan_recall | 0.8515 |
manhattan_ap | 0.7188 |
euclidean_accuracy | 0.7736 |
euclidean_accuracy_threshold | 8.3566 |
euclidean_f1 | 0.7088 |
euclidean_f1_threshold | 10.0929 |
euclidean_precision | 0.6079 |
euclidean_recall | 0.8499 |
euclidean_ap | 0.7191 |
dot_accuracy | 0.7442 |
dot_accuracy_threshold | 168.5663 |
dot_f1 | 0.6832 |
dot_f1_threshold | 142.4585 |
dot_precision | 0.5665 |
dot_recall | 0.8603 |
dot_ap | 0.6694 |
max_accuracy | 0.7736 |
max_accuracy_threshold | 181.4664 |
max_f1 | 0.7088 |
max_f1_threshold | 222.9119 |
max_precision | 0.6079 |
max_recall | 0.8603 |
max_ap | 0.7191 |
Paraphrase Mining
- Dataset:
dev
- Evaluated with
ParaphraseMiningEvaluator
Metric | Value |
---|---|
average_precision | 0.478 |
f1 | 0.5119 |
precision | 0.4683 |
recall | 0.5645 |
threshold | 0.8193 |
Information Retrieval
- Evaluated with
InformationRetrievalEvaluator
Metric | Value |
---|---|
cosine_accuracy@1 | 0.9654 |
cosine_accuracy@3 | 0.9904 |
cosine_accuracy@5 | 0.9948 |
cosine_accuracy@10 | 0.9974 |
cosine_precision@1 | 0.9654 |
cosine_precision@3 | 0.4355 |
cosine_precision@5 | 0.2806 |
cosine_precision@10 | 0.1493 |
cosine_recall@1 | 0.8251 |
cosine_recall@3 | 0.9549 |
cosine_recall@5 | 0.9758 |
cosine_recall@10 | 0.9898 |
cosine_ndcg@10 | 0.9786 |
cosine_mrr@10 | 0.9786 |
cosine_map@100 | 0.9714 |
dot_accuracy@1 | 0.9512 |
dot_accuracy@3 | 0.985 |
dot_accuracy@5 | 0.9914 |
dot_accuracy@10 | 0.9964 |
dot_precision@1 | 0.9512 |
dot_precision@3 | 0.4303 |
dot_precision@5 | 0.2788 |
dot_precision@10 | 0.149 |
dot_recall@1 | 0.8119 |
dot_recall@3 | 0.946 |
dot_recall@5 | 0.9708 |
dot_recall@10 | 0.9884 |
dot_ndcg@10 | 0.9703 |
dot_mrr@10 | 0.9693 |
dot_map@100 | 0.96 |
Training Details
Training Dataset
Unnamed Dataset
- Size: 207,326 training samples
- Columns:
sentence_0
,sentence_1
, andlabel
- Approximate statistics based on the first 1000 samples:
sentence_0 sentence_1 label type string string int details - min: 6 tokens
- mean: 13.75 tokens
- max: 42 tokens
- min: 6 tokens
- mean: 13.74 tokens
- max: 44 tokens
- 1: ~100.00%
- Samples:
sentence_0 sentence_1 label How do I improve writing skill by myself?
How can I improve writing skills?
1
Is it best to switch to Node.js from PHP?
Should I switch to Node.js or continue using PHP?
1
What do Hillary Clinton's supporters say when confronted with all her lies and scandals?
What do Clinton supporters say when confronted with her scandals such as the emails and 'Clinton Cash'?
1
- Loss:
sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss
with these parameters:{ "scale": 20.0, "similarity_fct": "cos_sim" }
Training Hyperparameters
Non-Default Hyperparameters
- per_device_train_batch_size: 64
- per_device_eval_batch_size: 64
- num_train_epochs: 1
- round_robin_sampler: True
All Hyperparameters
Click to expand
- overwrite_output_dir: False
- do_predict: False
- prediction_loss_only: False
- per_device_train_batch_size: 64
- per_device_eval_batch_size: 64
- per_gpu_train_batch_size: None
- per_gpu_eval_batch_size: None
- gradient_accumulation_steps: 1
- eval_accumulation_steps: None
- learning_rate: 5e-05
- weight_decay: 0.0
- adam_beta1: 0.9
- adam_beta2: 0.999
- adam_epsilon: 1e-08
- max_grad_norm: 1
- num_train_epochs: 1
- max_steps: -1
- lr_scheduler_type: linear
- lr_scheduler_kwargs: {}
- warmup_ratio: 0.0
- warmup_steps: 0
- log_level: passive
- log_level_replica: warning
- log_on_each_node: True
- logging_nan_inf_filter: True
- save_safetensors: True
- save_on_each_node: False
- save_only_model: False
- no_cuda: False
- use_cpu: False
- use_mps_device: False
- seed: 42
- data_seed: None
- jit_mode_eval: False
- use_ipex: False
- bf16: False
- fp16: False
- fp16_opt_level: O1
- half_precision_backend: auto
- bf16_full_eval: False
- fp16_full_eval: False
- tf32: None
- local_rank: 0
- ddp_backend: None
- tpu_num_cores: None
- tpu_metrics_debug: False
- debug: []
- dataloader_drop_last: False
- dataloader_num_workers: 0
- dataloader_prefetch_factor: None
- past_index: -1
- disable_tqdm: False
- remove_unused_columns: True
- label_names: None
- load_best_model_at_end: False
- ignore_data_skip: False
- fsdp: []
- fsdp_min_num_params: 0
- fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
- fsdp_transformer_layer_cls_to_wrap: None
- accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}
- deepspeed: None
- label_smoothing_factor: 0.0
- optim: adamw_torch
- optim_args: None
- adafactor: False
- group_by_length: False
- length_column_name: length
- ddp_find_unused_parameters: None
- ddp_bucket_cap_mb: None
- ddp_broadcast_buffers: None
- dataloader_pin_memory: True
- dataloader_persistent_workers: False
- skip_memory_metrics: True
- use_legacy_prediction_loop: False
- push_to_hub: False
- resume_from_checkpoint: None
- hub_model_id: None
- hub_strategy: every_save
- hub_private_repo: False
- hub_always_push: False
- gradient_checkpointing: False
- gradient_checkpointing_kwargs: None
- include_inputs_for_metrics: False
- fp16_backend: auto
- push_to_hub_model_id: None
- push_to_hub_organization: None
- mp_parameters:
- auto_find_batch_size: False
- full_determinism: False
- torchdynamo: None
- ray_scope: last
- ddp_timeout: 1800
- torch_compile: False
- torch_compile_backend: None
- torch_compile_mode: None
- dispatch_batches: None
- split_batches: None
- include_tokens_per_second: False
- include_num_input_tokens_seen: False
- neftune_noise_alpha: None
- optim_target_modules: None
- round_robin_sampler: True
Training Logs
Epoch | Step | Training Loss | cosine_accuracy | cosine_map@100 | dev_average_precision |
---|---|---|---|---|---|
0 | 0 | - | 0.7661 | 0.9371 | 0.4137 |
0.1543 | 500 | 0.1055 | 0.7632 | 0.9620 | 0.4731 |
0.3086 | 1000 | 0.0677 | 0.7608 | 0.9675 | 0.4732 |
0.4630 | 1500 | 0.0612 | 0.7663 | 0.9710 | 0.4856 |
0.6173 | 2000 | 0.0584 | 0.7719 | 0.9693 | 0.4925 |
0.7716 | 2500 | 0.0506 | 0.7714 | 0.9709 | 0.4808 |
0.9259 | 3000 | 0.0488 | 0.7708 | 0.9713 | 0.4784 |
1.0 | 3240 | - | 0.7707 | 0.9714 | 0.4780 |
Framework Versions
- Python: 3.11.6
- Sentence Transformers: 2.7.0.dev0
- Transformers: 4.39.3
- PyTorch: 2.1.0+cu121
- Accelerate: 0.26.1
- Datasets: 2.18.0
- Tokenizers: 0.15.2
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
MultipleNegativesRankingLoss
@misc{henderson2017efficient,
title={Efficient Natural Language Response Suggestion for Smart Reply},
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
year={2017},
eprint={1705.00652},
archivePrefix={arXiv},
primaryClass={cs.CL}
}