Spaces:

adinarayana
/

Sample

Sleeping

App Files Files Community

adinarayana commited on Feb 15, 2024

Commit

a8825e5

verified ·

1 Parent(s): df6d309

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -66

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import re
 import pdfminer
 from pdfminer.high_level import extract_pages
-from transformers import pipeline, QuestionAnsweringModel, QuestionAnsweringProcessor
 import streamlit as st
@@ -43,10 +43,10 @@ def answer_question(text, question):
     """
     qa_model_name = "deepset/roberta-base-squad2"  # Replace with your chosen model
-    qa_model = QuestionAnsweringModel.from_pretrained(qa_model_name)
-    qa_processor = QuestionAnsweringProcessor.from_pretrained(qa_model_name)
-    inputs = qa_processor(question, text, return_tensors="pt")
     outputs = qa_model(**inputs)
     start_scores, end_scores = outputs.start_logits, outputs.end_logits
@@ -82,7 +82,7 @@ if uploaded_file is not None:
             summarize_button = st.button("Generate Summary")
             if summarize_button:
                 with st.spinner("Summarizing..."):
-                    summary_response = get_openai_response(text, min_length=min_summary_length, model=summarization_model)
                     st.subheader("Summary")
                     st.write(summary_response[0]["summary_text"])
             if question:
@@ -92,64 +92,3 @@ if uploaded_file is not None:
                     st.write(answer)
         else:
             st.error("No text found in the PDF.")
-# import os
-# import re
-# import pdfminer
-# from pdfminer.high_level import extract_pages
-# from transformers import pipeline
-# import streamlit as st
-# def preprocess_text(element):
-#     if isinstance(element, pdfminer.layout.LTTextBoxHorizontal):  # Check for text elements
-#         text = element.get_text().strip()
-#         # Remove non-textual elements
-#         text = re.sub(r'[^\w\s]', '', text)  # Replace with your preferred regular expression
-#         # Remove stop words (optional)
-#         # from nltk.corpus import stopwords
-#         # stop_words = set(stopwords.words('english'))
-#         # text = " ".join([word for word in text.split() if word not in stop_words])
-#         # Convert to lowercase (optional)
-#         # text = text.lower()
-#         return text
-#     else:
-#         return ""
-# def get_openai_response(text, min_length=100, model="t5-small"):
-#     summarizer = pipeline("summarization", model=model)
-#     return summarizer(text, min_length=min_length)
-# ## Streamlit app
-# st.set_page_config(page_title="Trail Demo")
-# st.header("PDF Summarizer")
-# # User options
-# st.subheader("Settings")
-# min_summary_length = st.slider("Minimum Summary Length", min_value=50, max_value=500, value=100)
-# # max_summary_length = st.slider("Maximum Summary Length", min_value=50, max_value=500, value=100)
-# summarization_model = st.selectbox("Summarization Model", ["t5-small", "facebook/bart-large-cnn"])
-# # File upload and processing
-# uploaded_file = st.file_uploader("Choose a PDF file")
-# if uploaded_file is not None:
-#     with st.spinner("Processing..."):
-#         text = ""
-#         for page_layout in extract_pages(uploaded_file):
-#             for element in page_layout:
-#                 text += preprocess_text(element) + "\n"
-#         if text:
-#             submit = st.button("Generate Summary")
-#             if submit:
-#                 with st.spinner("Summarizing..."):
-#                     response = get_openai_response(text, min_length=min_summary_length, model=summarization_model)
-#                     st.subheader("Summary")
-#                     st.write(response[0]["summary_text"])
-#         else:
-#             st.error("No text found in the PDF.")

 import re
 import pdfminer
 from pdfminer.high_level import extract_pages
+from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
 import streamlit as st
     """
     qa_model_name = "deepset/roberta-base-squad2"  # Replace with your chosen model
+    qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
+    tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
+    inputs = tokenizer(question, text, return_tensors="pt")  # Tokenize inputs
     outputs = qa_model(**inputs)
     start_scores, end_scores = outputs.start_logits, outputs.end_logits
             summarize_button = st.button("Generate Summary")
             if summarize_button:
                 with st.spinner("Summarizing..."):
+                    summary_response = pipeline("summarization", model=summarization_model)(text, min_length=min_summary_length)
                     st.subheader("Summary")
                     st.write(summary_response[0]["summary_text"])
             if question:
                     st.write(answer)
         else:
             st.error("No text found in the PDF.")