elshehawy commited on
Commit
cc01c8c
·
1 Parent(s): c1fc26e

update to work on one pdf

Browse files
Files changed (1) hide show
  1. app.py +14 -20
app.py CHANGED
@@ -12,12 +12,6 @@ from pypdf import PdfReader, PdfWriter
12
  from pathlib import Path
13
 
14
 
15
- data_root = './data/pdf/'
16
-
17
- def load_pdf_paths(data_root):
18
- return [data_root+path for path in os.listdir(data_root)]
19
-
20
-
21
  def build_rag_chain(pdf_paths):
22
  loaders = [PyPDFLoader(path) for path in pdf_paths]
23
 
@@ -60,25 +54,25 @@ def build_rag_chain(pdf_paths):
60
  def predict(query, pdf_file):
61
  print(type(pdf_file))
62
  if pdf_file:
63
- pdf_path = Path(pdf_file)
64
- pdf_reader = PdfReader(pdf_path)
65
- pdf_writer = PdfWriter()
66
 
67
 
68
- pdf_name = pdf_file.split('/')[-1]
69
- pdf_path = data_root + pdf_name
70
 
71
- if pdf_path not in load_pdf_paths(data_root):
72
- print('Saving file...')
73
- for page in pdf_reader.pages:
74
- pdf_writer.add_page(page)
75
 
76
- with open(pdf_path, 'wb') as f:
77
- pdf_writer.write(f)
78
- os.system("ls data/pdf")
79
 
80
- pdf_paths = load_pdf_paths(data_root)
81
- rag_chain = build_rag_chain(pdf_paths)
82
  return rag_chain.invoke(query)
83
 
84
  # examples = [
 
12
  from pathlib import Path
13
 
14
 
 
 
 
 
 
 
15
  def build_rag_chain(pdf_paths):
16
  loaders = [PyPDFLoader(path) for path in pdf_paths]
17
 
 
54
  def predict(query, pdf_file):
55
  print(type(pdf_file))
56
  if pdf_file:
57
+ # pdf_path = Path(pdf_file)
58
+ # pdf_reader = PdfReader(pdf_path)
59
+ # pdf_writer = PdfWriter()
60
 
61
 
62
+ # pdf_name = pdf_file.split('/')[-1]
63
+ # pdf_path = data_root + pdf_name
64
 
65
+ # if pdf_path not in load_pdf_paths(data_root):
66
+ # print('Saving file...')
67
+ # for page in pdf_reader.pages:
68
+ # pdf_writer.add_page(page)
69
 
70
+ # with open(pdf_path, 'wb') as f:
71
+ # pdf_writer.write(f)
72
+ # os.system("ls data/pdf")
73
 
74
+ # pdf_paths = load_pdf_paths(data_root)
75
+ rag_chain = build_rag_chain([pdf_file])
76
  return rag_chain.invoke(query)
77
 
78
  # examples = [