Spaces:
Paused
Paused
update to work on one pdf
Browse files
app.py
CHANGED
@@ -12,12 +12,6 @@ from pypdf import PdfReader, PdfWriter
|
|
12 |
from pathlib import Path
|
13 |
|
14 |
|
15 |
-
data_root = './data/pdf/'
|
16 |
-
|
17 |
-
def load_pdf_paths(data_root):
|
18 |
-
return [data_root+path for path in os.listdir(data_root)]
|
19 |
-
|
20 |
-
|
21 |
def build_rag_chain(pdf_paths):
|
22 |
loaders = [PyPDFLoader(path) for path in pdf_paths]
|
23 |
|
@@ -60,25 +54,25 @@ def build_rag_chain(pdf_paths):
|
|
60 |
def predict(query, pdf_file):
|
61 |
print(type(pdf_file))
|
62 |
if pdf_file:
|
63 |
-
pdf_path = Path(pdf_file)
|
64 |
-
pdf_reader = PdfReader(pdf_path)
|
65 |
-
pdf_writer = PdfWriter()
|
66 |
|
67 |
|
68 |
-
pdf_name = pdf_file.split('/')[-1]
|
69 |
-
pdf_path = data_root + pdf_name
|
70 |
|
71 |
-
if pdf_path not in load_pdf_paths(data_root):
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
os.system("ls data/pdf")
|
79 |
|
80 |
-
pdf_paths = load_pdf_paths(data_root)
|
81 |
-
|
82 |
return rag_chain.invoke(query)
|
83 |
|
84 |
# examples = [
|
|
|
12 |
from pathlib import Path
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def build_rag_chain(pdf_paths):
|
16 |
loaders = [PyPDFLoader(path) for path in pdf_paths]
|
17 |
|
|
|
54 |
def predict(query, pdf_file):
|
55 |
print(type(pdf_file))
|
56 |
if pdf_file:
|
57 |
+
# pdf_path = Path(pdf_file)
|
58 |
+
# pdf_reader = PdfReader(pdf_path)
|
59 |
+
# pdf_writer = PdfWriter()
|
60 |
|
61 |
|
62 |
+
# pdf_name = pdf_file.split('/')[-1]
|
63 |
+
# pdf_path = data_root + pdf_name
|
64 |
|
65 |
+
# if pdf_path not in load_pdf_paths(data_root):
|
66 |
+
# print('Saving file...')
|
67 |
+
# for page in pdf_reader.pages:
|
68 |
+
# pdf_writer.add_page(page)
|
69 |
|
70 |
+
# with open(pdf_path, 'wb') as f:
|
71 |
+
# pdf_writer.write(f)
|
72 |
+
# os.system("ls data/pdf")
|
73 |
|
74 |
+
# pdf_paths = load_pdf_paths(data_root)
|
75 |
+
rag_chain = build_rag_chain([pdf_file])
|
76 |
return rag_chain.invoke(query)
|
77 |
|
78 |
# examples = [
|