Dhahlan2000 commited on
Commit
b2834f1
·
verified ·
1 Parent(s): 36c2d7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -25
app.py CHANGED
@@ -22,19 +22,19 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
22
  translator = pipeline('translation', model=trans_model, tokenizer=eng_trans_tokenizer, src_lang="eng_Latn", tgt_lang=chat_language, max_length = 400, device=device)
23
 
24
  # Initialize translation pipelines
25
- pipe = pipeline("translation", model="thilina/mt5-sinhalese-english")
26
 
27
- sin_trans_model = AutoModelForSeq2SeqLM.from_pretrained("thilina/mt5-sinhalese-english")
28
- si_trans_tokenizer = AutoTokenizer.from_pretrained("thilina/mt5-sinhalese-english")
29
 
30
- singlish_pipe = pipeline("text2text-generation", model="Dhahlan2000/Simple_Translation-model-for-GPT-v15")
31
 
32
  # Translation functions
33
  def translate_Singlish_to_sinhala(text):
34
 
35
- translated_text = singlish_pipe(f"translate Singlish to Sinhala: {text}", clean_up_tokenization_spaces=False)[0]['generated_text']
36
 
37
- return translated_text.replace('\u200d', '')
38
 
39
  def translate_english_to_sinhala(text):
40
  # Split the text into sentences or paragraphs
@@ -47,21 +47,21 @@ def translate_english_to_sinhala(text):
47
  translated_text = "\n".join(translated_parts)
48
  return translated_text.replace("ප් රභූවරුන්", "").replace('\u200d', '')
49
 
50
- def translate_sinhala_to_english(text):
51
- # Split the text into sentences or paragraphs
52
- parts = text.split("\n") # Split by new lines for paragraphs, adjust as needed
53
- translated_parts = []
54
- for part in parts:
55
- # Tokenize each part
56
- inputs = si_trans_tokenizer(part.strip(), return_tensors="pt", padding=True, truncation=True, max_length=512)
57
- # Generate translation
58
- outputs = sin_trans_model.generate(**inputs)
59
- # Decode translated text while preserving formatting
60
- translated_part = si_trans_tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
61
- translated_parts.append(translated_part)
62
- # Join the translated parts back together
63
- translated_text = "\n".join(translated_parts)
64
- return translated_text
65
 
66
  def transliterate_from_sinhala(text):
67
  # Define the source and target scripts
@@ -116,11 +116,11 @@ def conversation_predict(input_text):
116
 
117
  def ai_predicted(user_input):
118
  user_input = translate_Singlish_to_sinhala(user_input)
119
- print("You(Singlish): ", user_input,"\n")
120
- user_input = transliterate_to_sinhala(user_input)
121
- print("You(Sinhala): ", user_input,"\n")
122
- user_input = translate_sinhala_to_english(user_input)
123
  print("You(English): ", user_input,"\n")
 
 
 
 
124
 
125
  # Get AI response
126
  ai_response = conversation_predict(user_input)
 
22
  translator = pipeline('translation', model=trans_model, tokenizer=eng_trans_tokenizer, src_lang="eng_Latn", tgt_lang=chat_language, max_length = 400, device=device)
23
 
24
  # Initialize translation pipelines
25
+ # pipe = pipeline("translation", model="thilina/mt5-sinhalese-english")
26
 
27
+ # sin_trans_model = AutoModelForSeq2SeqLM.from_pretrained("thilina/mt5-sinhalese-english")
28
+ # si_trans_tokenizer = AutoTokenizer.from_pretrained("thilina/mt5-sinhalese-english")
29
 
30
+ singlish_pipe = pipeline("text2text-generation", model="Dhahlan2000/Chitti-Base-model-for-GPT-v7")
31
 
32
  # Translation functions
33
  def translate_Singlish_to_sinhala(text):
34
 
35
+ translated_text = singlish_pipe(f"translate Singlish to English: {text}", clean_up_tokenization_spaces=False)[0]['generated_text']
36
 
37
+ return translated_text
38
 
39
  def translate_english_to_sinhala(text):
40
  # Split the text into sentences or paragraphs
 
47
  translated_text = "\n".join(translated_parts)
48
  return translated_text.replace("ප් රභූවරුන්", "").replace('\u200d', '')
49
 
50
+ # def translate_sinhala_to_english(text):
51
+ # # Split the text into sentences or paragraphs
52
+ # parts = text.split("\n") # Split by new lines for paragraphs, adjust as needed
53
+ # translated_parts = []
54
+ # for part in parts:
55
+ # # Tokenize each part
56
+ # inputs = si_trans_tokenizer(part.strip(), return_tensors="pt", padding=True, truncation=True, max_length=512)
57
+ # # Generate translation
58
+ # outputs = sin_trans_model.generate(**inputs)
59
+ # # Decode translated text while preserving formatting
60
+ # translated_part = si_trans_tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
61
+ # translated_parts.append(translated_part)
62
+ # # Join the translated parts back together
63
+ # translated_text = "\n".join(translated_parts)
64
+ # return translated_text
65
 
66
  def transliterate_from_sinhala(text):
67
  # Define the source and target scripts
 
116
 
117
  def ai_predicted(user_input):
118
  user_input = translate_Singlish_to_sinhala(user_input)
 
 
 
 
119
  print("You(English): ", user_input,"\n")
120
+ # user_input = transliterate_to_sinhala(user_input)
121
+ # print("You(Sinhala): ", user_input,"\n")
122
+ # user_input = translate_sinhala_to_english(user_input)
123
+ # print("You(English): ", user_input,"\n")
124
 
125
  # Get AI response
126
  ai_response = conversation_predict(user_input)