Arslan17121 commited on
Commit
52ae8b2
·
verified ·
1 Parent(s): e3578a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -29,17 +29,17 @@ def clean_text(text):
29
  def split_text_into_paragraphs(text):
30
  return text.split('\n\n')
31
 
32
- def summarize_text_pegasus(text, max_length=800):
33
  inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
34
  summary_ids = model.generate(
35
- inputs["input_ids"], max_length=max_length, min_length=200,
36
  length_penalty=2.0, num_beams=4, early_stopping=True
37
  )
38
  return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
39
 
40
- def summarize_large_document(text, max_length=800):
41
  paragraphs = split_text_into_paragraphs(text)
42
- summaries = [summarize_text_pegasus(paragraph, max_length=max_length) for paragraph in paragraphs]
43
  return " ".join(summaries)
44
 
45
  def text_to_speech(text, lang="en"):
@@ -76,7 +76,7 @@ if uploaded_file:
76
 
77
  if st.button("Summarize Document"):
78
  with st.spinner("Summarizing document..."):
79
- summary = summarize_large_document(cleaned_text, max_length=800)
80
  st.text_area("Summary", summary, height=300)
81
 
82
  if st.button("Convert Summary to Audiobook"):
 
29
  def split_text_into_paragraphs(text):
30
  return text.split('\n\n')
31
 
32
+ def summarize_text_pegasus(text, max_length=512):
33
  inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
34
  summary_ids = model.generate(
35
+ inputs["input_ids"], max_length=max_length, min_length=50,
36
  length_penalty=2.0, num_beams=4, early_stopping=True
37
  )
38
  return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
39
 
40
+ def summarize_large_document(text, max_length=512):
41
  paragraphs = split_text_into_paragraphs(text)
42
+ summaries = [summarize_text_pegasus(paragraph, max_length=max_length) for paragraph in paragraphs[:10]] # Limit to first 10 paragraphs
43
  return " ".join(summaries)
44
 
45
  def text_to_speech(text, lang="en"):
 
76
 
77
  if st.button("Summarize Document"):
78
  with st.spinner("Summarizing document..."):
79
+ summary = summarize_large_document(cleaned_text, max_length=512)
80
  st.text_area("Summary", summary, height=300)
81
 
82
  if st.button("Convert Summary to Audiobook"):