Arslan17121 commited on
Commit
dced02e
·
verified ·
1 Parent(s): e04a0c8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from pytesseract import image_to_string
4
+ import pyttsx3
5
+
6
+ # Initialize text summarization pipeline and text-to-speech engine
7
+ summarizer = pipeline("summarization")
8
+ engine = pyttsx3.init()
9
+
10
+ def summarize_pdf(pdf_file):
11
+ """Extracts text from PDF, summarizes it, and returns discussion points."""
12
+
13
+ # Extract text from PDF using pytesseract (consider alternatives if performance is critical)
14
+ text = image_to_string(pdf_file.read(), config='--psm 6') # Improve accuracy for complex layouts
15
+
16
+ # Summarize the extracted text
17
+ summary = summarizer(text, max_length=150, min_length=50, do_sample=False) # Control summary length
18
+ discussion_points = [point["summary_text"] for point in summary["sentences"]] # Extract discussion points
19
+
20
+ return discussion_points
21
+
22
+ def play_audio(text):
23
+ """Converts text to speech and plays it."""
24
+
25
+ engine.say(text)
26
+ engine.runAndWait()
27
+
28
+ def main():
29
+ """Streamlit app layout and functionality."""
30
+
31
+ st.title("Discussion Point Summarizer")
32
+ st.subheader("Upload a PDF to generate key discussion points.")
33
+
34
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
35
+
36
+ if uploaded_file is not None:
37
+ discussion_points = summarize_pdf(uploaded_file)
38
+
39
+ st.header("Discussion Points:")
40
+ for point in discussion_points:
41
+ st.write(f"- {point}")
42
+
43
+ if st.button("Listen to Discussion Points"):
44
+ play_audio(" ".join(discussion_points)) # Combine points for smoother speech
45
+
46
+ if __name__ == "__main__":
47
+ main()