amirhoseinsedaghati commited on
Commit
c4c785c
·
verified ·
1 Parent(s): 8b0afac

Update pages/Translate_Text.py

Browse files
Files changed (1) hide show
  1. pages/Translate_Text.py +33 -52
pages/Translate_Text.py CHANGED
@@ -9,14 +9,14 @@ import pandas as pd
9
 
10
 
11
 
12
- def translate_text_to_text(text, target_lang):
13
- prefix = f'translate to {target_lang}: '
14
  text = prefix + text
15
- tokenizer = AutoTokenizer.from_pretrained('CohereForAI/aya-101')
16
- inputs = tokenizer(text, return_tensors='pt')
17
- model = AutoModelForSeq2SeqLM.from_pretrained('CohereForAI/aya-101')
18
- outputs = model.generate(inputs, max_new_tokens=len(inputs.input_ids[0]) * 3, do_sample=False)
19
- translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
  return translated_text
21
 
22
 
@@ -35,59 +35,40 @@ def main():
35
  with im3:
36
  pass
37
 
38
- languages = ['Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Belarusian',
39
- 'Bengali', 'Bulgarian', 'Catalan', 'Cebuano', 'Czech', 'Welsh',
40
- 'Danish', 'German', 'Greek', 'English', 'Esperanto', 'Estonian',
41
- 'Basque', 'Finnish', 'Filipino', 'French', 'Western Frisian',
42
- 'Scottish Gaelic', 'Irish', 'Galician', 'Gujarati', 'Haitian',
43
- 'Hausa', 'Hebrew', 'Hindi', 'Hungarian', 'Armenian', 'Igbo',
44
- 'Indonesian', 'Icelandic', 'Italian', 'Javanese', 'Japanese',
45
- 'Kannada', 'Georgian', 'Kazakh', 'Khmer', 'Kirghiz', 'Korean',
46
- 'Kurdish', 'Lao', 'Latvian', 'Latin', 'Lithuanian', 'Luxembourgish',
47
- 'Malayalam', 'Marathi', 'Macedonian', 'Malagasy', 'Maltese',
48
- 'Mongolian', 'Maori', 'Malay', 'Burmese', 'Nepali', 'Dutch',
49
- 'Norwegian', 'Pedi', 'Nyanja', 'Odia', 'Panjabi', 'Persian',
50
- 'Polish', 'Portuguese', 'Pushto', 'Romanian', 'Russian', 'Sinhala',
51
- 'Slovak', 'Slovenian', 'Samoan', 'Shona', 'Sindhi', 'Somali',
52
- 'Southern Sotho', 'Spanish', 'Albanian', 'Serbian', 'Sundanese',
53
- 'Swahili', 'Swedish', 'Tamil', 'Telugu', 'Tajik', 'Thai', 'Turkish',
54
- 'Twi', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Xhosa', 'Yiddish',
55
- 'Yoruba', 'Chinese','Zulu'
56
- ]
57
- # languages = ['English', 'French']
58
- # source_lang = st.sidebar.selectbox('Source Language', languages)
59
  target_lang = st.sidebar.selectbox('Target Language', languages, index=1)
60
  text = st.text_area('Text Translator', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
61
 
62
  if st.button('translate it'):
63
  if text != '':
64
- # if (source_lang == 'English' and target_lang == 'English') or (source_lang == 'French' and target_lang == 'French'):
65
- # st.error('Expected different values for source and target languages, but got the same values!')
66
 
67
- # else:
68
- with st.expander('Original Text'):
69
- st.write(text)
70
- add_one_item(text, 'Text Translator')
71
 
72
- with st.expander('Translated Text'):
73
- translated_text = translate_text_to_text(text, target_lang)
74
- st.write(translated_text)
75
-
76
- col1, col2 = st.columns(2)
77
- with col1:
78
- with st.expander('Download Translated Text'):
79
- FileDownloader(translated_text, 'txt').download()
80
 
81
- with col2:
82
- with st.expander('Translated Text Validation'):
83
- bleu_score = validate_translation(text, translated_text)
84
- df = pd.DataFrame({
85
- 'Brevity Penalty' : bleu_score.bp,
86
- 'the length of the original text' : bleu_score.ref_len,
87
- 'the length of the translated text' : bleu_score.sys_len,
88
- 'Ratio' : bleu_score.ratio
89
- }, index=1)
90
- st.dataframe(df)
91
 
92
  else:
93
  st.error('Please enter a non-empty text.')
 
9
 
10
 
11
 
12
+ def translate_text_to_text(text, source_lang, target_lang):
13
+ prefix = f'translate {source_lang} to {target_lang}: '
14
  text = prefix + text
15
+ tokenizer = AutoTokenizer.from_pretrained('stevhliu/my_awesome_opus_books_model')
16
+ input_ids = tokenizer(text, return_tensors='pt').input_ids
17
+ model = AutoModelForSeq2SeqLM.from_pretrained('stevhliu/my_awesome_opus_books_model')
18
+ output_ids = model.generate(input_ids, max_new_tokens=len(input_ids[0]) * 3, do_sample=False, top_k=30, top_p=0.95)
19
+ translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
20
  return translated_text
21
 
22
 
 
35
  with im3:
36
  pass
37
 
38
+ languages = ['English', 'French']
39
+ source_lang = st.sidebar.selectbox('Source Language', languages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  target_lang = st.sidebar.selectbox('Target Language', languages, index=1)
41
  text = st.text_area('Text Translator', placeholder='Enter your input text here ...', height=200, label_visibility='hidden')
42
 
43
  if st.button('translate it'):
44
  if text != '':
45
+ if (source_lang == 'English' and target_lang == 'English') or (source_lang == 'French' and target_lang == 'French'):
46
+ st.error('Expected different values for source and target languages, but got the same values!')
47
 
48
+ else:
49
+ with st.expander('Original Text'):
50
+ st.write(text)
51
+ add_one_item(text, 'Text Translator')
52
 
53
+ with st.expander('Translated Text'):
54
+ translated_text = translate_text_to_text(text, source_lang, target_lang)
55
+ st.write(translated_text)
56
+
57
+ col1, col2 = st.columns(2)
58
+ with col1:
59
+ with st.expander('Download Translated Text'):
60
+ FileDownloader(translated_text, 'txt').download()
61
 
62
+ with col2:
63
+ with st.expander('Translated Text Validation'):
64
+ bleu_score = validate_translation(text, translated_text)
65
+ df = pd.DataFrame({
66
+ 'Brevity Penalty' : bleu_score.bp,
67
+ 'the length of the original text' : bleu_score.ref_len,
68
+ 'the length of the translated text' : bleu_score.sys_len,
69
+ 'Ratio' : bleu_score.ratio
70
+ }, index=1)
71
+ st.dataframe(df)
72
 
73
  else:
74
  st.error('Please enter a non-empty text.')