Update README.md
Browse files
README.md
CHANGED
|
@@ -1195,7 +1195,7 @@ model-index:
|
|
| 1195 |
- type: map_at_5
|
| 1196 |
value: 15.271
|
| 1197 |
- type: mrr_at_1
|
| 1198 |
-
value: 69
|
| 1199 |
- type: mrr_at_10
|
| 1200 |
value: 75.304
|
| 1201 |
- type: mrr_at_100
|
|
@@ -1219,9 +1219,9 @@ model-index:
|
|
| 1219 |
- type: ndcg_at_5
|
| 1220 |
value: 42.104
|
| 1221 |
- type: precision_at_1
|
| 1222 |
-
value: 69
|
| 1223 |
- type: precision_at_10
|
| 1224 |
-
value: 33
|
| 1225 |
- type: precision_at_100
|
| 1226 |
value: 10.75
|
| 1227 |
- type: precision_at_1000
|
|
@@ -1815,7 +1815,7 @@ model-index:
|
|
| 1815 |
- type: ndcg_at_3
|
| 1816 |
value: 85.435
|
| 1817 |
- type: ndcg_at_5
|
| 1818 |
-
value: 87
|
| 1819 |
- type: precision_at_1
|
| 1820 |
value: 81.24
|
| 1821 |
- type: precision_at_10
|
|
@@ -1910,13 +1910,13 @@ model-index:
|
|
| 1910 |
- type: precision_at_1
|
| 1911 |
value: 24.8
|
| 1912 |
- type: precision_at_10
|
| 1913 |
-
value: 12
|
| 1914 |
- type: precision_at_100
|
| 1915 |
value: 2.5420000000000003
|
| 1916 |
- type: precision_at_1000
|
| 1917 |
value: 0.39899999999999997
|
| 1918 |
- type: precision_at_3
|
| 1919 |
-
value: 20
|
| 1920 |
- type: precision_at_5
|
| 1921 |
value: 17.4
|
| 1922 |
- type: recall_at_1
|
|
@@ -2197,7 +2197,7 @@ model-index:
|
|
| 2197 |
- type: recall_at_100
|
| 2198 |
value: 96.167
|
| 2199 |
- type: recall_at_1000
|
| 2200 |
-
value: 100
|
| 2201 |
- type: recall_at_3
|
| 2202 |
value: 74.117
|
| 2203 |
- type: recall_at_5
|
|
@@ -2250,7 +2250,7 @@ model-index:
|
|
| 2250 |
- type: manhattan_precision
|
| 2251 |
value: 91.72482552342971
|
| 2252 |
- type: manhattan_recall
|
| 2253 |
-
value: 92
|
| 2254 |
- type: max_accuracy
|
| 2255 |
value: 99.83861386138614
|
| 2256 |
- type: max_ap
|
|
@@ -2331,7 +2331,7 @@ model-index:
|
|
| 2331 |
- type: map_at_5
|
| 2332 |
value: 1.001
|
| 2333 |
- type: mrr_at_1
|
| 2334 |
-
value: 76
|
| 2335 |
- type: mrr_at_10
|
| 2336 |
value: 85.667
|
| 2337 |
- type: mrr_at_100
|
|
@@ -2343,7 +2343,7 @@ model-index:
|
|
| 2343 |
- type: mrr_at_5
|
| 2344 |
value: 85.667
|
| 2345 |
- type: ndcg_at_1
|
| 2346 |
-
value: 72
|
| 2347 |
- type: ndcg_at_10
|
| 2348 |
value: 68.637
|
| 2349 |
- type: ndcg_at_100
|
|
@@ -2355,7 +2355,7 @@ model-index:
|
|
| 2355 |
- type: ndcg_at_5
|
| 2356 |
value: 71.808
|
| 2357 |
- type: precision_at_1
|
| 2358 |
-
value: 76
|
| 2359 |
- type: precision_at_10
|
| 2360 |
value: 73.8
|
| 2361 |
- type: precision_at_100
|
|
@@ -2365,7 +2365,7 @@ model-index:
|
|
| 2365 |
- type: precision_at_3
|
| 2366 |
value: 74.667
|
| 2367 |
- type: precision_at_5
|
| 2368 |
-
value: 78
|
| 2369 |
- type: recall_at_1
|
| 2370 |
value: 0.22100000000000003
|
| 2371 |
- type: recall_at_10
|
|
@@ -2596,6 +2596,10 @@ model-index:
|
|
| 2596 |
value: 85.53503846009764
|
| 2597 |
- type: max_f1
|
| 2598 |
value: 77.68167368965773
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2599 |
---
|
| 2600 |
|
| 2601 |
<br><br>
|
|
@@ -2605,7 +2609,7 @@ model-index:
|
|
| 2605 |
</p>
|
| 2606 |
|
| 2607 |
<p align="center">
|
| 2608 |
-
<b>The crispy
|
| 2609 |
</p>
|
| 2610 |
|
| 2611 |
# mxbai-embed-2d-large-v1
|
|
@@ -2617,7 +2621,73 @@ model-index:
|
|
| 2617 |
|
| 2618 |
Currently, the best way to use our models is with the most recent version of sentence-transformers.
|
| 2619 |
|
| 2620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2621 |
|
| 2622 |
### angle-emb
|
| 2623 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1195 |
- type: map_at_5
|
| 1196 |
value: 15.271
|
| 1197 |
- type: mrr_at_1
|
| 1198 |
+
value: 69
|
| 1199 |
- type: mrr_at_10
|
| 1200 |
value: 75.304
|
| 1201 |
- type: mrr_at_100
|
|
|
|
| 1219 |
- type: ndcg_at_5
|
| 1220 |
value: 42.104
|
| 1221 |
- type: precision_at_1
|
| 1222 |
+
value: 69
|
| 1223 |
- type: precision_at_10
|
| 1224 |
+
value: 33
|
| 1225 |
- type: precision_at_100
|
| 1226 |
value: 10.75
|
| 1227 |
- type: precision_at_1000
|
|
|
|
| 1815 |
- type: ndcg_at_3
|
| 1816 |
value: 85.435
|
| 1817 |
- type: ndcg_at_5
|
| 1818 |
+
value: 87
|
| 1819 |
- type: precision_at_1
|
| 1820 |
value: 81.24
|
| 1821 |
- type: precision_at_10
|
|
|
|
| 1910 |
- type: precision_at_1
|
| 1911 |
value: 24.8
|
| 1912 |
- type: precision_at_10
|
| 1913 |
+
value: 12
|
| 1914 |
- type: precision_at_100
|
| 1915 |
value: 2.5420000000000003
|
| 1916 |
- type: precision_at_1000
|
| 1917 |
value: 0.39899999999999997
|
| 1918 |
- type: precision_at_3
|
| 1919 |
+
value: 20
|
| 1920 |
- type: precision_at_5
|
| 1921 |
value: 17.4
|
| 1922 |
- type: recall_at_1
|
|
|
|
| 2197 |
- type: recall_at_100
|
| 2198 |
value: 96.167
|
| 2199 |
- type: recall_at_1000
|
| 2200 |
+
value: 100
|
| 2201 |
- type: recall_at_3
|
| 2202 |
value: 74.117
|
| 2203 |
- type: recall_at_5
|
|
|
|
| 2250 |
- type: manhattan_precision
|
| 2251 |
value: 91.72482552342971
|
| 2252 |
- type: manhattan_recall
|
| 2253 |
+
value: 92
|
| 2254 |
- type: max_accuracy
|
| 2255 |
value: 99.83861386138614
|
| 2256 |
- type: max_ap
|
|
|
|
| 2331 |
- type: map_at_5
|
| 2332 |
value: 1.001
|
| 2333 |
- type: mrr_at_1
|
| 2334 |
+
value: 76
|
| 2335 |
- type: mrr_at_10
|
| 2336 |
value: 85.667
|
| 2337 |
- type: mrr_at_100
|
|
|
|
| 2343 |
- type: mrr_at_5
|
| 2344 |
value: 85.667
|
| 2345 |
- type: ndcg_at_1
|
| 2346 |
+
value: 72
|
| 2347 |
- type: ndcg_at_10
|
| 2348 |
value: 68.637
|
| 2349 |
- type: ndcg_at_100
|
|
|
|
| 2355 |
- type: ndcg_at_5
|
| 2356 |
value: 71.808
|
| 2357 |
- type: precision_at_1
|
| 2358 |
+
value: 76
|
| 2359 |
- type: precision_at_10
|
| 2360 |
value: 73.8
|
| 2361 |
- type: precision_at_100
|
|
|
|
| 2365 |
- type: precision_at_3
|
| 2366 |
value: 74.667
|
| 2367 |
- type: precision_at_5
|
| 2368 |
+
value: 78
|
| 2369 |
- type: recall_at_1
|
| 2370 |
value: 0.22100000000000003
|
| 2371 |
- type: recall_at_10
|
|
|
|
| 2596 |
value: 85.53503846009764
|
| 2597 |
- type: max_f1
|
| 2598 |
value: 77.68167368965773
|
| 2599 |
+
license: apache-2.0
|
| 2600 |
+
language:
|
| 2601 |
+
- en
|
| 2602 |
+
library_name: transformers
|
| 2603 |
---
|
| 2604 |
|
| 2605 |
<br><br>
|
|
|
|
| 2609 |
</p>
|
| 2610 |
|
| 2611 |
<p align="center">
|
| 2612 |
+
<b>The crispy sentence embedding family from <a href="https://mixedbread.ai"><b>mixedbread ai</b></a>.</b>
|
| 2613 |
</p>
|
| 2614 |
|
| 2615 |
# mxbai-embed-2d-large-v1
|
|
|
|
| 2621 |
|
| 2622 |
Currently, the best way to use our models is with the most recent version of sentence-transformers.
|
| 2623 |
|
| 2624 |
+
```bash
|
| 2625 |
+
python -m pip install -U sentence-transformers
|
| 2626 |
+
```
|
| 2627 |
+
|
| 2628 |
+
|
| 2629 |
+
```python
|
| 2630 |
+
from sentence_transformers import models, SentenceTransformer
|
| 2631 |
+
from sentence_transformers.util import cos_sim
|
| 2632 |
+
|
| 2633 |
+
|
| 2634 |
+
# 1. load model with `cls` pooling
|
| 2635 |
+
word_embedding_model = models.Transformer("mixedbread-ai/mxbai-embed-2d-large-v1")
|
| 2636 |
+
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode="cls")
|
| 2637 |
+
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
| 2638 |
+
|
| 2639 |
+
# 2. set adaptive layer and embedding size.
|
| 2640 |
+
# it is recommended to set layers from 20 to 24.
|
| 2641 |
+
new_num_layers = 22 # 1d: layer
|
| 2642 |
+
model[0].auto_model.encoder.layer = model[0].auto_model.encoder.layer[:new_num_layers]
|
| 2643 |
+
new_embedding_size = 768 # 2d: embedding size
|
| 2644 |
+
|
| 2645 |
+
|
| 2646 |
+
# 3. encode
|
| 2647 |
+
embeddings = model.encode(
|
| 2648 |
+
[
|
| 2649 |
+
'Who is german and likes bread?',
|
| 2650 |
+
'Everybody in German.'
|
| 2651 |
+
]
|
| 2652 |
+
)
|
| 2653 |
+
|
| 2654 |
+
# Similarity of the first sentence with the other two
|
| 2655 |
+
similarities = cos_sim(embeddings[0, :new_embedding_size], embeddings[1, :new_embedding_size])
|
| 2656 |
+
|
| 2657 |
+
print('similarities:', similarities)
|
| 2658 |
+
```
|
| 2659 |
|
| 2660 |
### angle-emb
|
| 2661 |
|
| 2662 |
+
You can also use the lastest `angle-emb` for inference, as follows:
|
| 2663 |
+
|
| 2664 |
+
```bash
|
| 2665 |
+
python -m pip install -U angle-emb
|
| 2666 |
+
```
|
| 2667 |
+
|
| 2668 |
+
```python
|
| 2669 |
+
from angle_emb import AnglE
|
| 2670 |
+
from sentence_transformers.util import cos_sim
|
| 2671 |
+
|
| 2672 |
+
# 1. load model
|
| 2673 |
+
model = AnglE.from_pretrained("mixedbread-ai/mxbai-embed-2d-large-v1", pooling_strategy='cls').cuda()
|
| 2674 |
+
|
| 2675 |
+
|
| 2676 |
+
# 2. set adaptive layer and embedding size.
|
| 2677 |
+
# it is recommended to set layers from 20 to 24.
|
| 2678 |
+
layer_index = 22 # 1d: layer
|
| 2679 |
+
embedding_size = 768 # 2d: embedding size
|
| 2680 |
+
|
| 2681 |
+
# 3. encode
|
| 2682 |
+
embeddings = model.encode([
|
| 2683 |
+
'Who is german and likes bread?',
|
| 2684 |
+
'Everybody in German.'
|
| 2685 |
+
], layer_index=layer_index, embedding_size=embedding_size)
|
| 2686 |
+
|
| 2687 |
+
similarities = cos_sim(embeddings[0], embeddings[1:])
|
| 2688 |
+
print('similarities:', similarities)
|
| 2689 |
+
```
|
| 2690 |
+
|
| 2691 |
+
### Using API
|
| 2692 |
+
You’ll be able to use the models through our API as well. The API is coming soon and will have some exciting features. Stay tuned!
|
| 2693 |
+
|