pratik-19 commited on
Commit
b042a07
·
1 Parent(s): 233fe4e

minor changes

Browse files
Files changed (1) hide show
  1. app.py +5 -37
app.py CHANGED
@@ -6,10 +6,10 @@ def load_models():
6
  tokenizer = MBart50Tokenizer.from_pretrained("facebook/mbart-large-50")
7
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50")
8
  summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
9
- translator = pipeline("translation", model=model, tokenizer=tokenizer)
10
- return tokenizer, summarizer, translator
11
 
12
- tokenizer, summarizer, translator = load_models()
13
 
14
  import streamlit as st
15
  LANGUAGE_CODES = {
@@ -27,33 +27,6 @@ def detect_language(text):
27
  return lang_code
28
 
29
 
30
- def translate_to_english(text, src_lang):
31
- # Define the target language as English
32
- tgt_lang = "en_XX"
33
-
34
- # Tokenize the input text with the appropriate source and target language tokens
35
- inputs = tokenizer(
36
- text,
37
- return_tensors="pt",
38
- max_length=1024,
39
- truncation=True
40
- )
41
-
42
- # Specify the source language and target language in the generation call
43
- translated_ids = translator.model.generate(
44
- inputs["input_ids"],
45
- max_length=100,
46
- length_penalty=2.0,
47
- num_beams=4,
48
- decoder_start_token_id=tokenizer.lang_code_to_id[tgt_lang], # Explicitly set the target language
49
- forced_bos_token_id=tokenizer.lang_code_to_id[src_lang] # Set the source language
50
- )
51
-
52
- # Decode the translated text
53
- translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
54
- translated_text = re.sub(r"<[^>]+>", "", translated_text).strip()
55
-
56
- return translated_text
57
 
58
 
59
  def summarize_text(text, lang_code):
@@ -76,8 +49,8 @@ def summarize_text(text, lang_code):
76
  return summary
77
 
78
 
79
- st.title("Multilingual Summarization and Translation App")
80
- st.markdown("""This app detects the language of the input text, summarizes it in the same language, and translates it into English.""")
81
 
82
  user_input = st.text_area("Enter text in any language:", "")
83
 
@@ -96,11 +69,6 @@ if st.button("Process Text"):
96
  st.write(f"### Summarized Text ({lang_code}):")
97
  st.write(summary)
98
 
99
- # Then translate the summary to English
100
- translation = translate_to_english(summary, LANGUAGE_CODES.get(lang_code, "en_XX"))
101
- st.write("### Translated Text (English):")
102
- st.write(translation)
103
-
104
  except Exception as e:
105
  st.error(f"An error occurred during processing: {e}")
106
  else:
 
6
  tokenizer = MBart50Tokenizer.from_pretrained("facebook/mbart-large-50")
7
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50")
8
  summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
9
+
10
+ return tokenizer, summarizer
11
 
12
+ tokenizer, summarizer = load_models()
13
 
14
  import streamlit as st
15
  LANGUAGE_CODES = {
 
27
  return lang_code
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
  def summarize_text(text, lang_code):
 
49
  return summary
50
 
51
 
52
+ st.title("Multilingual Summarization App")
53
+
54
 
55
  user_input = st.text_area("Enter text in any language:", "")
56
 
 
69
  st.write(f"### Summarized Text ({lang_code}):")
70
  st.write(summary)
71
 
 
 
 
 
 
72
  except Exception as e:
73
  st.error(f"An error occurred during processing: {e}")
74
  else: