adinarayana commited on
Commit
68de083
·
verified ·
1 Parent(s): 7abaafd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -2,11 +2,11 @@ import os
2
  import re
3
  import pdfminer
4
  from pdfminer.high_level import extract_pages
5
- from transformers import pipeline, TFBertForQuestionAnswering, AutoTokenizer
6
-
7
 
8
  import streamlit as st
9
 
 
10
  def preprocess_text(element):
11
  """Preprocesses text elements from the PDF.
12
 
@@ -32,6 +32,7 @@ def preprocess_text(element):
32
  else:
33
  return ""
34
 
 
35
  def answer_question(text, question):
36
  """Answers a question using the provided text and a pre-trained model.
37
 
@@ -42,23 +43,23 @@ def answer_question(text, question):
42
  Returns:
43
  The answer extracted from the text using the model.
44
  """
45
- qa_model_name = "bert-base-uncased" # Replace with your chosen model
46
 
47
  qa_model = TFBertForQuestionAnswering.from_pretrained(qa_model_name)
48
  tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
49
 
50
- inputs = tokenizer(question, text, return_tensors="pt") # Tokenize inputs
51
- outputs = qa_model(**inputs)
52
 
53
- start_scores, end_scores = outputs.start_logits, outputs.end_logits
54
 
55
- answer_start = torch.argmax(start_scores) # Get the predicted start index
56
- answer_end = torch.argmax(end_scores) + 1 # Get the predicted end index (exclusive)
57
 
58
- answer = text[answer_start:answer_end]
59
 
60
  return answer if answer else "No answer found."
61
 
 
62
  ## Streamlit app
63
 
64
  st.set_page_config(page_title="Enhanced PDF Summarizer")
 
2
  import re
3
  import pdfminer
4
  from pdfminer.high_level import extract_pages
5
+ from transformers import TFBertForQuestionAnswering, AutoTokenizer
 
6
 
7
  import streamlit as st
8
 
9
+
10
  def preprocess_text(element):
11
  """Preprocesses text elements from the PDF.
12
 
 
32
  else:
33
  return ""
34
 
35
+
36
  def answer_question(text, question):
37
  """Answers a question using the provided text and a pre-trained model.
38
 
 
43
  Returns:
44
  The answer extracted from the text using the model.
45
  """
46
+ qa_model_name = "bert-base-uncased" # Replace with your chosen TensorFlow QA model
47
 
48
  qa_model = TFBertForQuestionAnswering.from_pretrained(qa_model_name)
49
  tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
50
 
51
+ inputs = tokenizer(question, text, return_tensors="tf") # Tokenize inputs for TensorFlow
 
52
 
53
+ start_logits, end_logits = qa_model(inputs)
54
 
55
+ answer_start = tf.math.argmax(start_logits, axis=1) # Get predicted start index
56
+ answer_end = tf.math.argmax(end_logits, axis=1) + 1 # Get predicted end index (exclusive)
57
 
58
+ answer = tf.gather(text, answer_start, axis=1).numpy()[0][answer_start[0]:answer_end[0]]
59
 
60
  return answer if answer else "No answer found."
61
 
62
+
63
  ## Streamlit app
64
 
65
  st.set_page_config(page_title="Enhanced PDF Summarizer")