Spaces:

adinarayana
/

Sample

Sleeping

adinarayana commited on Feb 15, 2024

Commit

68de083

verified ·

1 Parent(s): 7abaafd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,11 +2,11 @@ import os
 import re
 import pdfminer
 from pdfminer.high_level import extract_pages
-from transformers import pipeline, TFBertForQuestionAnswering, AutoTokenizer
 import streamlit as st
 def preprocess_text(element):
     """Preprocesses text elements from the PDF.
@@ -32,6 +32,7 @@ def preprocess_text(element):
     else:
         return ""
 def answer_question(text, question):
     """Answers a question using the provided text and a pre-trained model.
@@ -42,23 +43,23 @@ def answer_question(text, question):
     Returns:
         The answer extracted from the text using the model.
     """
-    qa_model_name = "bert-base-uncased"  # Replace with your chosen model
     qa_model = TFBertForQuestionAnswering.from_pretrained(qa_model_name)
     tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
-    inputs = tokenizer(question, text, return_tensors="pt")  # Tokenize inputs
-    outputs = qa_model(**inputs)
-    start_scores, end_scores = outputs.start_logits, outputs.end_logits
-    answer_start = torch.argmax(start_scores)  # Get the predicted start index
-    answer_end = torch.argmax(end_scores) + 1  # Get the predicted end index (exclusive)
-    answer = text[answer_start:answer_end]
     return answer if answer else "No answer found."
 ## Streamlit app
 st.set_page_config(page_title="Enhanced PDF Summarizer")

 import re
 import pdfminer
 from pdfminer.high_level import extract_pages
+from transformers import TFBertForQuestionAnswering, AutoTokenizer
 import streamlit as st
 def preprocess_text(element):
     """Preprocesses text elements from the PDF.
     else:
         return ""
 def answer_question(text, question):
     """Answers a question using the provided text and a pre-trained model.
     Returns:
         The answer extracted from the text using the model.
     """
+    qa_model_name = "bert-base-uncased"  # Replace with your chosen TensorFlow QA model
     qa_model = TFBertForQuestionAnswering.from_pretrained(qa_model_name)
     tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
+    inputs = tokenizer(question, text, return_tensors="tf")  # Tokenize inputs for TensorFlow
+    start_logits, end_logits = qa_model(inputs)
+    answer_start = tf.math.argmax(start_logits, axis=1)  # Get predicted start index
+    answer_end = tf.math.argmax(end_logits, axis=1) + 1  # Get predicted end index (exclusive)
+    answer = tf.gather(text, answer_start, axis=1).numpy()[0][answer_start[0]:answer_end[0]]
     return answer if answer else "No answer found."
 ## Streamlit app
 st.set_page_config(page_title="Enhanced PDF Summarizer")