import os from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from flores200_codes import flores_codes # Use HF_TOKEN from environment or fall back to True (for public models) hf_token = auth_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or True model_dict = {} def load_models(model_name: str): # build model and tokenizer model_name_dict = { "Ɔbaa Panin MT: English-Akan": "hci-lab-dcug/Maternal_Health_Translation_English_Akan", "Ɔbaa Panin MT: Akan-English": "hci-lab-dcug/Maternal_Health_Translation_Akan_English" }[model_name] print("\tLoading model: %s" % model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name_dict, use_auth_token=auth_token) tokenizer = AutoTokenizer.from_pretrained(model_name_dict, use_auth_token=auth_token) model_dict[model_name + "_model"] = model model_dict[model_name + "_tokenizer"] = tokenizer return model_dict def translation(model_name: str, source, target, text: str): model_dict = load_models(model_name) source = flores_codes[source] target = flores_codes[target] model = model_dict[model_name + "_model"] tokenizer = model_dict[model_name + "_tokenizer"] translator = pipeline( "translation", model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target, ) output = translator(text, max_length=512) # Create a JSON-compatible dictionary with the translation result result = { "Translation": output[0]["translation_text"] } # Return the dictionary (Gradio will convert to JSON) return result