Commit
·
1056360
1
Parent(s):
6b1add2
Update README.md
Browse files
README.md
CHANGED
@@ -56,7 +56,7 @@ We compare our solution with both open automatic spell checkers and the ChatGPT
|
|
56 |
| HunSpell | 16.2 | 40.1 | 23.0 |
|
57 |
|
58 |
**MedSpellChecker**
|
59 |
-
|
|
60 |
| --- | --- | --- | --- |
|
61 |
| M2M100-1.2B | 63.7 | 57.8 | 60.6 |
|
62 |
| ChatGPT gpt-3.5-turbo-0301 | 53.2 | 67.6 | 59.6 |
|
@@ -67,7 +67,7 @@ We compare our solution with both open automatic spell checkers and the ChatGPT
|
|
67 |
| HunSpell | 10.3 | 40.2 | 16.4 |
|
68 |
|
69 |
**GitHubTypoCorpusRu**
|
70 |
-
|
|
71 |
| --- | --- | --- | --- |
|
72 |
| M2M100-1.2B | 45.7 | 41.4 | 43.5 |
|
73 |
| ChatGPT gpt-3.5-turbo-0301 | 43.8 | 57.0 | 49.6 |
|
@@ -75,4 +75,22 @@ We compare our solution with both open automatic spell checkers and the ChatGPT
|
|
75 |
| ChatGPT text-davinci-003 | 46.5 | 58.1 | 51.7 |
|
76 |
| Yandex.Speller | 67.7 | 37.5 | 48.3 |
|
77 |
| JamSpell | 49.5 | 29.9 | 37.3 |
|
78 |
-
| HunSpell | 28.5 | 30.7 | 29.6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
| HunSpell | 16.2 | 40.1 | 23.0 |
|
57 |
|
58 |
**MedSpellChecker**
|
59 |
+
| Model | Precision | Recall | F1 |
|
60 |
| --- | --- | --- | --- |
|
61 |
| M2M100-1.2B | 63.7 | 57.8 | 60.6 |
|
62 |
| ChatGPT gpt-3.5-turbo-0301 | 53.2 | 67.6 | 59.6 |
|
|
|
67 |
| HunSpell | 10.3 | 40.2 | 16.4 |
|
68 |
|
69 |
**GitHubTypoCorpusRu**
|
70 |
+
| Model | Precision | Recall | F1 |
|
71 |
| --- | --- | --- | --- |
|
72 |
| M2M100-1.2B | 45.7 | 41.4 | 43.5 |
|
73 |
| ChatGPT gpt-3.5-turbo-0301 | 43.8 | 57.0 | 49.6 |
|
|
|
75 |
| ChatGPT text-davinci-003 | 46.5 | 58.1 | 51.7 |
|
76 |
| Yandex.Speller | 67.7 | 37.5 | 48.3 |
|
77 |
| JamSpell | 49.5 | 29.9 | 37.3 |
|
78 |
+
| HunSpell | 28.5 | 30.7 | 29.6 |
|
79 |
+
|
80 |
+
## How to use
|
81 |
+
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
82 |
+
|
83 |
+
path_to_model = "<path_to_model>"
|
84 |
+
|
85 |
+
model = M2M100ForConditionalGeneration.from_pretrained(path_to_model)
|
86 |
+
tokenizer = M2M100Tokenizer.from_pretrained(path_to_model)
|
87 |
+
|
88 |
+
sentence = "прийдя в МГТУ я был удивлен никого необноружив там…"
|
89 |
+
|
90 |
+
encodings = tokenizer(sentence, return_tensors="pt")
|
91 |
+
generated_tokens = model.generate(
|
92 |
+
**encodings, forced_bos_token_id=tokenizer.get_lang_id("ru"))
|
93 |
+
answer = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
94 |
+
print(answer)
|
95 |
+
|
96 |
+
# ["прийдя в МГТУ я был удивлен никого не обнаружив там..."]
|