Spaces:
Sleeping
Sleeping
import os | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
from flores200_codes import flores_codes | |
# Use HF_TOKEN from environment or fall back to True (for public models) | |
hf_token = auth_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or True | |
model_dict = {} | |
def load_models(model_name: str): | |
# build model and tokenizer | |
model_name_dict = { | |
"ug_entw_translate": "nyarkssss/ug_entw_translate", | |
"ug_twen_translate": "nyarkssss/ug_twen_translate" | |
}[model_name] | |
print("\tLoading model: %s" % model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name_dict, use_auth_token=auth_token) | |
tokenizer = AutoTokenizer.from_pretrained(model_name_dict, use_auth_token=auth_token) | |
model_dict[model_name + "_model"] = model | |
model_dict[model_name + "_tokenizer"] = tokenizer | |
return model_dict | |
def translation(model_name: str, source, target, text: str): | |
model_dict = load_models(model_name) | |
source = flores_codes[source] | |
target = flores_codes[target] | |
model = model_dict[model_name + "_model"] | |
tokenizer = model_dict[model_name + "_tokenizer"] | |
translator = pipeline( | |
"translation", | |
model=model, | |
tokenizer=tokenizer, | |
src_lang=source, | |
tgt_lang=target, | |
) | |
output = translator(text, max_length=512) | |
# Create a JSON-compatible dictionary with the translation result | |
result = { | |
"Translation": output[0]["translation_text"] | |
} | |
# Return the dictionary (Gradio will convert to JSON) | |
return result | |
NLLB_EXAMPLES = [ | |
["nllb-200-distilled-600M", "English", "Akan", "Hello, how are you today?"], | |
["nllb-200-distilled-600M", "Akan", "English", "Me adwuma anopa yi."], | |
] |