|
""" |
|
This file contains the functions to translate the text from one language to another. |
|
""" |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
from deep_translator import GoogleTranslator, MyMemoryTranslator, MicrosoftTranslator, YandexTranslator, ChatGptTranslator |
|
from .text_preprocess import decontracting_words, space_punc |
|
from dotenv import load_dotenv |
|
import os |
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
MICROSOFT_API_KEY = os.getenv("MICROSOFT_TRANSLATOR_KEY") |
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
YANDEX_API_KEY = os.getenv("YANDEX_API_KEY") |
|
|
|
|
|
digit_converter = { |
|
'০': '0', |
|
'১': '1', |
|
'২': '2', |
|
'৩': '3', |
|
'৪': '4', |
|
'৫': '5', |
|
'৬': '6', |
|
'৭': '7', |
|
'৮': '8', |
|
'৯': '9' |
|
} |
|
|
|
|
|
def get_translated_digit(sentence): |
|
""" |
|
Translate the digits from Bengali to English |
|
""" |
|
translated_sentence = [] |
|
for each_letter in sentence: |
|
if each_letter in digit_converter.keys(): |
|
translated_sentence.append(digit_converter[each_letter]) |
|
|
|
else: |
|
translated_sentence.append(each_letter) |
|
|
|
|
|
return "".join(each for each in translated_sentence) |
|
|
|
|
|
translation_model_bn_en = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/banglat5_nmt_bn_en") |
|
translation_tokenizer_bn_en = AutoTokenizer.from_pretrained("csebuetnlp/banglat5_nmt_bn_en") |
|
|
|
def banglanmt_translation(input_text): |
|
""" |
|
Translate a sentence from Bengali to English using BUET BanglaNMT |
|
""" |
|
inputs = translation_tokenizer_bn_en(input_text, return_tensors="pt") |
|
outputs = translation_model_bn_en.generate(**inputs) |
|
translated_text = translation_tokenizer_bn_en.decode(outputs[0], skip_special_tokens=True) |
|
return translated_text |
|
|
|
def google_translation(sentence: str, source="bn", target="en") -> str: |
|
""" |
|
Translate a sentence from one language to another using Google Translator.\n |
|
At first install dependencies \n |
|
`!pip install -U deep-translator` |
|
""" |
|
translator = GoogleTranslator() |
|
translated_sentence = translator.translate( |
|
sentence, source=source, target=target) |
|
return translated_sentence |
|
|
|
def microsoft_translation(sentence: str, source="bn", target="en") -> str: |
|
""" |
|
Translate a sentence from one language to another using Microsoft Translator.\n |
|
At first install dependencies \n |
|
`!pip install -U deep-translator` |
|
""" |
|
translator = MicrosoftTranslator(api_key=MICROSOFT_API_KEY, target='en') |
|
translated_sentence = translator.translate(sentence) |
|
return translated_sentence |
|
|
|
def chatgpt_translation(sentence: str, source="bn", target="en") -> str: |
|
""" |
|
Translate a sentence from one language to another using ChatGPT Translator.\n |
|
At first install dependencies \n |
|
`!pip install -U deep-translator` |
|
""" |
|
translator = ChatGptTranslator(api_key=OPENAI_API_KEY, target=target) |
|
translated_sentence = translator.translate(sentence) |
|
return translated_sentence |
|
|
|
def yandex_translation(sentence: str, source="bn", target="en") -> str: |
|
""" |
|
Translate a sentence from one language to another using Yandex Translator.\n |
|
At first install dependencies \n |
|
`!pip install -U deep-translator` |
|
""" |
|
translator = YandexTranslator(api_key=YANDEX_API_KEY) |
|
translated_sentence = translator.translate( |
|
sentence, source=source, target=target) |
|
return translated_sentence |
|
|
|
def mymemory_translation(sentence: str, source="bn-IN", target="en-US") -> str: |
|
""" |
|
Translate a sentence from one language to another using MyMemory Translator.\n |
|
At first install dependencies \n |
|
`!pip install -U deep-translator` |
|
""" |
|
translator = MyMemoryTranslator(source=source, target=target) |
|
translated_sentence = translator.translate(sentence) |
|
return translated_sentence |
|
|
|
def get_better_translation(translator_func, src=""): |
|
src_mod = get_translated_digit(src) |
|
tgt = translator_func(src_mod) |
|
tgt = decontracting_words(tgt) |
|
tgt = tgt.replace('rupees', 'takas').replace('Rs', 'takas') |
|
return tgt |
|
|
|
def select_translator(src, translator): |
|
""" |
|
Select the translator |
|
""" |
|
tgt = None |
|
tgt_base = None |
|
|
|
if translator == "Google": |
|
tgt = get_better_translation(google_translation, src) |
|
tgt = space_punc(tgt) |
|
tgt_base = google_translation(src) |
|
elif translator == "BanglaNMT": |
|
tgt = get_better_translation(banglanmt_translation, src) |
|
tgt = space_punc(tgt) |
|
tgt_base = banglanmt_translation(src) |
|
elif translator == "MyMemory": |
|
tgt = get_better_translation(mymemory_translation, src) |
|
tgt = space_punc(tgt) |
|
tgt_base = mymemory_translation(src) |
|
|
|
return tgt_base, tgt |
|
|