Update goai_helpers/goai_traduction.py
Browse files
goai_helpers/goai_traduction.py
CHANGED
|
@@ -2,7 +2,7 @@ import torch
|
|
| 2 |
import spaces
|
| 3 |
import re
|
| 4 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 5 |
-
from goai_helpers.utils import MooreConverter
|
| 6 |
from huggingface_hub import login
|
| 7 |
import os
|
| 8 |
|
|
@@ -62,6 +62,8 @@ def translate_chunk(text, src_lang, tgt_lang):
|
|
| 62 |
else:
|
| 63 |
model_id = "ArissBandoss/nllb-200-3.3B-fr2mos"
|
| 64 |
#model_id = "ArissBandoss/nllb-200-3.3B-mos-fr-bidirectional-peft"
|
|
|
|
|
|
|
| 65 |
|
| 66 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
|
| 67 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, token=auth_token).to(device)
|
|
@@ -86,6 +88,15 @@ def translate_chunk(text, src_lang, tgt_lang):
|
|
| 86 |
# Décodage
|
| 87 |
translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
return translation
|
| 91 |
|
|
|
|
| 2 |
import spaces
|
| 3 |
import re
|
| 4 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 5 |
+
from goai_helpers.utils import MooreConverter, mark_numbers, unmark_numbers
|
| 6 |
from huggingface_hub import login
|
| 7 |
import os
|
| 8 |
|
|
|
|
| 62 |
else:
|
| 63 |
model_id = "ArissBandoss/nllb-200-3.3B-fr2mos"
|
| 64 |
#model_id = "ArissBandoss/nllb-200-3.3B-mos-fr-bidirectional-peft"
|
| 65 |
+
|
| 66 |
+
text = mark_numbers(text)
|
| 67 |
|
| 68 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
|
| 69 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, token=auth_token).to(device)
|
|
|
|
| 88 |
# Décodage
|
| 89 |
translation = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 90 |
|
| 91 |
+
translation = unmark_numbers(translation)
|
| 92 |
+
|
| 93 |
+
number_converter = MooreConverter()
|
| 94 |
+
numbers = re.findall(r'\b\d+\b', translation)
|
| 95 |
+
for number in numbers:
|
| 96 |
+
moore_number = number_converter.number_to_moore(int(number))
|
| 97 |
+
if moore_number: # Only replace if conversion succeeded
|
| 98 |
+
translation = translation.replace(number, moore_number)
|
| 99 |
+
|
| 100 |
|
| 101 |
return translation
|
| 102 |
|