Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,19 +22,19 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
22 |
translator = pipeline('translation', model=trans_model, tokenizer=eng_trans_tokenizer, src_lang="eng_Latn", tgt_lang=chat_language, max_length = 400, device=device)
|
23 |
|
24 |
# Initialize translation pipelines
|
25 |
-
pipe = pipeline("translation", model="thilina/mt5-sinhalese-english")
|
26 |
|
27 |
-
sin_trans_model = AutoModelForSeq2SeqLM.from_pretrained("thilina/mt5-sinhalese-english")
|
28 |
-
si_trans_tokenizer = AutoTokenizer.from_pretrained("thilina/mt5-sinhalese-english")
|
29 |
|
30 |
-
singlish_pipe = pipeline("text2text-generation", model="Dhahlan2000/
|
31 |
|
32 |
# Translation functions
|
33 |
def translate_Singlish_to_sinhala(text):
|
34 |
|
35 |
-
translated_text = singlish_pipe(f"translate Singlish to
|
36 |
|
37 |
-
return translated_text
|
38 |
|
39 |
def translate_english_to_sinhala(text):
|
40 |
# Split the text into sentences or paragraphs
|
@@ -47,21 +47,21 @@ def translate_english_to_sinhala(text):
|
|
47 |
translated_text = "\n".join(translated_parts)
|
48 |
return translated_text.replace("ප් රභූවරුන්", "").replace('\u200d', '')
|
49 |
|
50 |
-
def translate_sinhala_to_english(text):
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
|
66 |
def transliterate_from_sinhala(text):
|
67 |
# Define the source and target scripts
|
@@ -116,11 +116,11 @@ def conversation_predict(input_text):
|
|
116 |
|
117 |
def ai_predicted(user_input):
|
118 |
user_input = translate_Singlish_to_sinhala(user_input)
|
119 |
-
print("You(Singlish): ", user_input,"\n")
|
120 |
-
user_input = transliterate_to_sinhala(user_input)
|
121 |
-
print("You(Sinhala): ", user_input,"\n")
|
122 |
-
user_input = translate_sinhala_to_english(user_input)
|
123 |
print("You(English): ", user_input,"\n")
|
|
|
|
|
|
|
|
|
124 |
|
125 |
# Get AI response
|
126 |
ai_response = conversation_predict(user_input)
|
|
|
22 |
translator = pipeline('translation', model=trans_model, tokenizer=eng_trans_tokenizer, src_lang="eng_Latn", tgt_lang=chat_language, max_length = 400, device=device)
|
23 |
|
24 |
# Initialize translation pipelines
|
25 |
+
# pipe = pipeline("translation", model="thilina/mt5-sinhalese-english")
|
26 |
|
27 |
+
# sin_trans_model = AutoModelForSeq2SeqLM.from_pretrained("thilina/mt5-sinhalese-english")
|
28 |
+
# si_trans_tokenizer = AutoTokenizer.from_pretrained("thilina/mt5-sinhalese-english")
|
29 |
|
30 |
+
singlish_pipe = pipeline("text2text-generation", model="Dhahlan2000/Chitti-Base-model-for-GPT-v7")
|
31 |
|
32 |
# Translation functions
|
33 |
def translate_Singlish_to_sinhala(text):
|
34 |
|
35 |
+
translated_text = singlish_pipe(f"translate Singlish to English: {text}", clean_up_tokenization_spaces=False)[0]['generated_text']
|
36 |
|
37 |
+
return translated_text
|
38 |
|
39 |
def translate_english_to_sinhala(text):
|
40 |
# Split the text into sentences or paragraphs
|
|
|
47 |
translated_text = "\n".join(translated_parts)
|
48 |
return translated_text.replace("ප් රභූවරුන්", "").replace('\u200d', '')
|
49 |
|
50 |
+
# def translate_sinhala_to_english(text):
|
51 |
+
# # Split the text into sentences or paragraphs
|
52 |
+
# parts = text.split("\n") # Split by new lines for paragraphs, adjust as needed
|
53 |
+
# translated_parts = []
|
54 |
+
# for part in parts:
|
55 |
+
# # Tokenize each part
|
56 |
+
# inputs = si_trans_tokenizer(part.strip(), return_tensors="pt", padding=True, truncation=True, max_length=512)
|
57 |
+
# # Generate translation
|
58 |
+
# outputs = sin_trans_model.generate(**inputs)
|
59 |
+
# # Decode translated text while preserving formatting
|
60 |
+
# translated_part = si_trans_tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
61 |
+
# translated_parts.append(translated_part)
|
62 |
+
# # Join the translated parts back together
|
63 |
+
# translated_text = "\n".join(translated_parts)
|
64 |
+
# return translated_text
|
65 |
|
66 |
def transliterate_from_sinhala(text):
|
67 |
# Define the source and target scripts
|
|
|
116 |
|
117 |
def ai_predicted(user_input):
|
118 |
user_input = translate_Singlish_to_sinhala(user_input)
|
|
|
|
|
|
|
|
|
119 |
print("You(English): ", user_input,"\n")
|
120 |
+
# user_input = transliterate_to_sinhala(user_input)
|
121 |
+
# print("You(Sinhala): ", user_input,"\n")
|
122 |
+
# user_input = translate_sinhala_to_english(user_input)
|
123 |
+
# print("You(English): ", user_input,"\n")
|
124 |
|
125 |
# Get AI response
|
126 |
ai_response = conversation_predict(user_input)
|