Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import re
|
3 |
import pdfminer
|
4 |
from pdfminer.high_level import extract_pages
|
5 |
-
from transformers import pipeline,
|
6 |
|
7 |
import streamlit as st
|
8 |
|
@@ -43,10 +43,10 @@ def answer_question(text, question):
|
|
43 |
"""
|
44 |
qa_model_name = "deepset/roberta-base-squad2" # Replace with your chosen model
|
45 |
|
46 |
-
qa_model =
|
47 |
-
|
48 |
|
49 |
-
inputs =
|
50 |
outputs = qa_model(**inputs)
|
51 |
|
52 |
start_scores, end_scores = outputs.start_logits, outputs.end_logits
|
@@ -82,7 +82,7 @@ if uploaded_file is not None:
|
|
82 |
summarize_button = st.button("Generate Summary")
|
83 |
if summarize_button:
|
84 |
with st.spinner("Summarizing..."):
|
85 |
-
summary_response =
|
86 |
st.subheader("Summary")
|
87 |
st.write(summary_response[0]["summary_text"])
|
88 |
if question:
|
@@ -92,64 +92,3 @@ if uploaded_file is not None:
|
|
92 |
st.write(answer)
|
93 |
else:
|
94 |
st.error("No text found in the PDF.")
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
# import os
|
100 |
-
# import re
|
101 |
-
# import pdfminer
|
102 |
-
# from pdfminer.high_level import extract_pages
|
103 |
-
# from transformers import pipeline
|
104 |
-
|
105 |
-
# import streamlit as st
|
106 |
-
|
107 |
-
# def preprocess_text(element):
|
108 |
-
# if isinstance(element, pdfminer.layout.LTTextBoxHorizontal): # Check for text elements
|
109 |
-
# text = element.get_text().strip()
|
110 |
-
# # Remove non-textual elements
|
111 |
-
# text = re.sub(r'[^\w\s]', '', text) # Replace with your preferred regular expression
|
112 |
-
|
113 |
-
# # Remove stop words (optional)
|
114 |
-
# # from nltk.corpus import stopwords
|
115 |
-
# # stop_words = set(stopwords.words('english'))
|
116 |
-
# # text = " ".join([word for word in text.split() if word not in stop_words])
|
117 |
-
|
118 |
-
# # Convert to lowercase (optional)
|
119 |
-
# # text = text.lower()
|
120 |
-
# return text
|
121 |
-
# else:
|
122 |
-
# return ""
|
123 |
-
|
124 |
-
# def get_openai_response(text, min_length=100, model="t5-small"):
|
125 |
-
# summarizer = pipeline("summarization", model=model)
|
126 |
-
# return summarizer(text, min_length=min_length)
|
127 |
-
|
128 |
-
# ## Streamlit app
|
129 |
-
|
130 |
-
# st.set_page_config(page_title="Trail Demo")
|
131 |
-
# st.header("PDF Summarizer")
|
132 |
-
|
133 |
-
# # User options
|
134 |
-
# st.subheader("Settings")
|
135 |
-
# min_summary_length = st.slider("Minimum Summary Length", min_value=50, max_value=500, value=100)
|
136 |
-
# # max_summary_length = st.slider("Maximum Summary Length", min_value=50, max_value=500, value=100)
|
137 |
-
# summarization_model = st.selectbox("Summarization Model", ["t5-small", "facebook/bart-large-cnn"])
|
138 |
-
|
139 |
-
# # File upload and processing
|
140 |
-
# uploaded_file = st.file_uploader("Choose a PDF file")
|
141 |
-
# if uploaded_file is not None:
|
142 |
-
# with st.spinner("Processing..."):
|
143 |
-
# text = ""
|
144 |
-
# for page_layout in extract_pages(uploaded_file):
|
145 |
-
# for element in page_layout:
|
146 |
-
# text += preprocess_text(element) + "\n"
|
147 |
-
# if text:
|
148 |
-
# submit = st.button("Generate Summary")
|
149 |
-
# if submit:
|
150 |
-
# with st.spinner("Summarizing..."):
|
151 |
-
# response = get_openai_response(text, min_length=min_summary_length, model=summarization_model)
|
152 |
-
# st.subheader("Summary")
|
153 |
-
# st.write(response[0]["summary_text"])
|
154 |
-
# else:
|
155 |
-
# st.error("No text found in the PDF.")
|
|
|
2 |
import re
|
3 |
import pdfminer
|
4 |
from pdfminer.high_level import extract_pages
|
5 |
+
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
|
6 |
|
7 |
import streamlit as st
|
8 |
|
|
|
43 |
"""
|
44 |
qa_model_name = "deepset/roberta-base-squad2" # Replace with your chosen model
|
45 |
|
46 |
+
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
|
47 |
+
tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
|
48 |
|
49 |
+
inputs = tokenizer(question, text, return_tensors="pt") # Tokenize inputs
|
50 |
outputs = qa_model(**inputs)
|
51 |
|
52 |
start_scores, end_scores = outputs.start_logits, outputs.end_logits
|
|
|
82 |
summarize_button = st.button("Generate Summary")
|
83 |
if summarize_button:
|
84 |
with st.spinner("Summarizing..."):
|
85 |
+
summary_response = pipeline("summarization", model=summarization_model)(text, min_length=min_summary_length)
|
86 |
st.subheader("Summary")
|
87 |
st.write(summary_response[0]["summary_text"])
|
88 |
if question:
|
|
|
92 |
st.write(answer)
|
93 |
else:
|
94 |
st.error("No text found in the PDF.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|