adnaniqbal001 commited on
Commit
c07449b
·
verified ·
1 Parent(s): 2210613

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -8
app.py CHANGED
@@ -3,14 +3,9 @@ import PyPDF2
3
  from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
4
  from sentence_transformers import SentenceTransformer
5
  import torch
6
- from google.colab import files
7
 
8
- # Step 1: Upload the PDF file
9
- print("Please upload a PDF file containing the chapter.")
10
- uploaded = files.upload()
11
-
12
- # Extract the name of the uploaded file
13
- file_name = list(uploaded.keys())[0]
14
 
15
  # Step 2: Extract text from the PDF
16
  def extract_text_from_pdf(file_path):
@@ -21,7 +16,7 @@ def extract_text_from_pdf(file_path):
21
  text += page.extract_text()
22
  return text
23
 
24
- chapter_text = extract_text_from_pdf(file_name)
25
  print("Text extracted from the PDF successfully!")
26
 
27
  # Step 3: Split the text into smaller passages
 
3
  from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
4
  from sentence_transformers import SentenceTransformer
5
  import torch
 
6
 
7
+ # Step 1: Ask for PDF file path
8
+ file_path = input("Please provide the full path to the PDF file containing the chapter: ")
 
 
 
 
9
 
10
  # Step 2: Extract text from the PDF
11
  def extract_text_from_pdf(file_path):
 
16
  text += page.extract_text()
17
  return text
18
 
19
+ chapter_text = extract_text_from_pdf(file_path)
20
  print("Text extracted from the PDF successfully!")
21
 
22
  # Step 3: Split the text into smaller passages