Spaces:

nyarkssss
/

ug_twen_translate

Sleeping

nyarkssss

new

46c9ee7 4 days ago

1.78 kB

	import os
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	from flores200_codes import flores_codes

	# Use HF_TOKEN from environment or fall back to True (for public models)
	hf_token = auth_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or True
	model_dict = {}


	def load_models(model_name: str):
	# build model and tokenizer
	model_name_dict = {
	"ug_entw_translate": "nyarkssss/ug_entw_translate",
	"ug_twen_translate": "nyarkssss/ug_twen_translate"
	}[model_name]

	print("\tLoading model: %s" % model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name_dict, use_auth_token=auth_token)
	tokenizer = AutoTokenizer.from_pretrained(model_name_dict, use_auth_token=auth_token)
	model_dict[model_name + "_model"] = model
	model_dict[model_name + "_tokenizer"] = tokenizer

	return model_dict


	def translation(model_name: str, source, target, text: str):

	model_dict = load_models(model_name)

	source = flores_codes[source]
	target = flores_codes[target]

	model = model_dict[model_name + "_model"]
	tokenizer = model_dict[model_name + "_tokenizer"]

	translator = pipeline(
	"translation",
	model=model,
	tokenizer=tokenizer,
	src_lang=source,
	tgt_lang=target,
	)
	output = translator(text, max_length=512)

	# Create a JSON-compatible dictionary with the translation result
	result = {
	"Translation": output[0]["translation_text"]
	}

	# Return the dictionary (Gradio will convert to JSON)
	return result


	NLLB_EXAMPLES = [
	["nllb-200-distilled-600M", "English", "Akan", "Hello, how are you today?"],
	["nllb-200-distilled-600M", "Akan", "English", "Me adwuma anopa yi."],
	]