Spaces:
Running
Running
Arslan17121
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
|
|
2 |
import pdfplumber
|
3 |
import re
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
5 |
-
from
|
6 |
from sklearn.feature_extraction.text import CountVectorizer
|
7 |
from nltk.sentiment import SentimentIntensityAnalyzer
|
8 |
|
@@ -10,7 +10,6 @@ from nltk.sentiment import SentimentIntensityAnalyzer
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True)
|
11 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True)
|
12 |
qa_pipeline = pipeline("question-answering")
|
13 |
-
tts = TTS()
|
14 |
sia = SentimentIntensityAnalyzer()
|
15 |
|
16 |
# Helper functions
|
@@ -61,8 +60,12 @@ def answer_question_with_context(question, context, chunk_size=500):
|
|
61 |
continue
|
62 |
return " ".join(answers)
|
63 |
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
|
67 |
def extract_keywords(text, top_n=10):
|
68 |
vectorizer = CountVectorizer(stop_words="english")
|
@@ -95,8 +98,8 @@ if uploaded_file:
|
|
95 |
|
96 |
if st.button("Convert Summary to Audiobook"):
|
97 |
with st.spinner("Generating audio..."):
|
98 |
-
|
99 |
-
st.audio(
|
100 |
|
101 |
st.markdown("### Ask Questions About the Document")
|
102 |
question = st.text_input("Your Question:")
|
@@ -106,8 +109,8 @@ if uploaded_file:
|
|
106 |
st.write(f"**Answer:** {answer}")
|
107 |
if st.button("Convert Answer to Audio"):
|
108 |
with st.spinner("Generating answer audio..."):
|
109 |
-
|
110 |
-
st.audio(
|
111 |
|
112 |
st.markdown("### Document Insights")
|
113 |
if st.checkbox("Extract Keywords"):
|
|
|
2 |
import pdfplumber
|
3 |
import re
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
5 |
+
from gtts import gTTS
|
6 |
from sklearn.feature_extraction.text import CountVectorizer
|
7 |
from nltk.sentiment import SentimentIntensityAnalyzer
|
8 |
|
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True)
|
11 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B", trust_remote_code=True)
|
12 |
qa_pipeline = pipeline("question-answering")
|
|
|
13 |
sia = SentimentIntensityAnalyzer()
|
14 |
|
15 |
# Helper functions
|
|
|
60 |
continue
|
61 |
return " ".join(answers)
|
62 |
|
63 |
+
# Replace Tortoise-TTS with gTTS for text-to-speech functionality
|
64 |
+
def text_to_speech(text, language="en"):
|
65 |
+
tts = gTTS(text=text, lang=language, slow=False)
|
66 |
+
file_name = "output_audio.mp3"
|
67 |
+
tts.save(file_name)
|
68 |
+
return file_name
|
69 |
|
70 |
def extract_keywords(text, top_n=10):
|
71 |
vectorizer = CountVectorizer(stop_words="english")
|
|
|
98 |
|
99 |
if st.button("Convert Summary to Audiobook"):
|
100 |
with st.spinner("Generating audio..."):
|
101 |
+
audio_file = text_to_speech(summary)
|
102 |
+
st.audio(audio_file, format="audio/mp3", start_time=0)
|
103 |
|
104 |
st.markdown("### Ask Questions About the Document")
|
105 |
question = st.text_input("Your Question:")
|
|
|
109 |
st.write(f"**Answer:** {answer}")
|
110 |
if st.button("Convert Answer to Audio"):
|
111 |
with st.spinner("Generating answer audio..."):
|
112 |
+
answer_audio_file = text_to_speech(answer)
|
113 |
+
st.audio(answer_audio_file, format="audio/mp3", start_time=0)
|
114 |
|
115 |
st.markdown("### Document Insights")
|
116 |
if st.checkbox("Extract Keywords"):
|