Upload README.md
Browse files
README.md
CHANGED
@@ -81,7 +81,7 @@ Using this model becomes easy when you have [sentence-transformers](https://www.
|
|
81 |
pip install -U sentence-transformers
|
82 |
```
|
83 |
|
84 |
-
|
85 |
|
86 |
```python
|
87 |
from sentence_transformers import SentenceTransformer
|
@@ -92,10 +92,57 @@ embeddings = model.encode(sentences)
|
|
92 |
print(embeddings)
|
93 |
```
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
### License
|
96 |
|
97 |
This project is licensed under the [MIT License](./LICENSE).
|
98 |
|
99 |
### Copyright
|
100 |
|
101 |
-
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu).
|
|
|
81 |
pip install -U sentence-transformers
|
82 |
```
|
83 |
|
84 |
+
### Embeddings
|
85 |
|
86 |
```python
|
87 |
from sentence_transformers import SentenceTransformer
|
|
|
92 |
print(embeddings)
|
93 |
```
|
94 |
|
95 |
+
### Usage Example
|
96 |
+
|
97 |
+
```sh
|
98 |
+
from sentence_transformers import SentenceTransformer, util
|
99 |
+
import torch
|
100 |
+
|
101 |
+
# Define sentences in Hausa
|
102 |
+
sentences = [
|
103 |
+
"Menene sunan babban birnin Ingila?",
|
104 |
+
"Wanne dabba ne mafi zafi a duniya?",
|
105 |
+
"Ta yaya zan iya koyon harshen Hausa?",
|
106 |
+
"Wanne abinci ne mafi shahara a Najeriya?",
|
107 |
+
"Wane irin kaya ake sawa don bikin Hausa?"
|
108 |
+
]
|
109 |
+
|
110 |
+
# Load the Hausa-trained model
|
111 |
+
model = SentenceTransformer('path/to/pmmlv2-fine-tuned-hausa')
|
112 |
+
|
113 |
+
# Compute embeddings
|
114 |
+
embeddings = model.encode(sentences, convert_to_tensor=True)
|
115 |
+
|
116 |
+
# Function to find the closest sentence
|
117 |
+
def find_closest_sentence(query_embedding, sentence_embeddings, sentences):
|
118 |
+
# Compute cosine similarities
|
119 |
+
cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
|
120 |
+
# Find the position of the highest score
|
121 |
+
best_match_index = torch.argmax(cosine_scores).item()
|
122 |
+
return sentences[best_match_index], cosine_scores[best_match_index].item()
|
123 |
+
|
124 |
+
query = "Menene sunan babban birnin Ingila?"
|
125 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
126 |
+
closest_sentence, similarity_score = find_closest_sentence(query_embedding, embeddings, sentences)
|
127 |
+
|
128 |
+
print(f"Tambaya: {query}")
|
129 |
+
print(f"Jimla mafi kusa: {closest_sentence}")
|
130 |
+
print(f"Alamar kama: {similarity_score:.4f}")
|
131 |
+
|
132 |
+
# You can also try with a new sentence not in the original list
|
133 |
+
new_query = "Wanne sarki ne yake mulkin Kano a yanzu?"
|
134 |
+
new_query_embedding = model.encode(new_query, convert_to_tensor=True)
|
135 |
+
closest_sentence, similarity_score = find_closest_sentence(new_query_embedding, embeddings, sentences)
|
136 |
+
|
137 |
+
print(f"\nSabuwar Tambaya: {new_query}")
|
138 |
+
print(f"Jimla mafi kusa: {closest_sentence}")
|
139 |
+
print(f"Alamar kama: {similarity_score:.4f}")
|
140 |
+
```
|
141 |
+
|
142 |
### License
|
143 |
|
144 |
This project is licensed under the [MIT License](./LICENSE).
|
145 |
|
146 |
### Copyright
|
147 |
|
148 |
+
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu).
|