Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,15 +18,16 @@ CHUNK_SIZE = 600
|
|
18 |
CHUNK_OVERLAP = 40
|
19 |
|
20 |
# Load PDF document and create doc splits
|
21 |
-
def load_doc(list_file_path
|
22 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
23 |
pages = []
|
24 |
for loader in loaders:
|
25 |
pages.extend(loader.load())
|
26 |
|
27 |
text_splitter = RecursiveCharacterTextSplitter(
|
28 |
-
chunk_size =
|
29 |
-
chunk_overlap =
|
|
|
30 |
doc_splits = text_splitter.split_documents(pages)
|
31 |
return doc_splits
|
32 |
|
@@ -84,14 +85,13 @@ def initialize_llmchain(temperature, max_tokens, top_k, vector_db, progress=gr.P
|
|
84 |
|
85 |
|
86 |
# Initialize database
|
87 |
-
def initialize_database(list_file_obj,
|
88 |
# Create list of documents (when valid)
|
89 |
-
#file_path = file_obj.name
|
90 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
91 |
# print('list_file_path', list_file_path)
|
92 |
progress(0.25, desc="Loading document...")
|
93 |
# Load document and create splits
|
94 |
-
doc_splits = load_doc(list_file_path
|
95 |
# Create or load Vector database
|
96 |
progress(0.5, desc="Generating vector database...")
|
97 |
# global vector_db
|
@@ -139,8 +139,6 @@ def upload_file(file_obj):
|
|
139 |
for idx, file in enumerate(file_obj):
|
140 |
file_path = file_obj.name
|
141 |
list_file_path.append(file_path)
|
142 |
-
# print(file_path)
|
143 |
-
# initialize_database(file_path, progress)
|
144 |
return list_file_path
|
145 |
|
146 |
|
@@ -191,9 +189,8 @@ def demo():
|
|
191 |
clear_btn = gr.ClearButton([msg, chatbot])
|
192 |
|
193 |
# Preprocessing events
|
194 |
-
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|
195 |
db_btn.click(initialize_database, \
|
196 |
-
inputs=[document
|
197 |
outputs=[vector_db, db_progress])
|
198 |
qachain_btn.click(initialize_LLM, \
|
199 |
inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \
|
|
|
18 |
CHUNK_OVERLAP = 40
|
19 |
|
20 |
# Load PDF document and create doc splits
|
21 |
+
def load_doc(list_file_path):
|
22 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
23 |
pages = []
|
24 |
for loader in loaders:
|
25 |
pages.extend(loader.load())
|
26 |
|
27 |
text_splitter = RecursiveCharacterTextSplitter(
|
28 |
+
chunk_size = CHUNK_SIZE,
|
29 |
+
chunk_overlap = CHUNK_OVERLAP
|
30 |
+
)
|
31 |
doc_splits = text_splitter.split_documents(pages)
|
32 |
return doc_splits
|
33 |
|
|
|
85 |
|
86 |
|
87 |
# Initialize database
|
88 |
+
def initialize_database(list_file_obj, progress=gr.Progress()):
|
89 |
# Create list of documents (when valid)
|
|
|
90 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
91 |
# print('list_file_path', list_file_path)
|
92 |
progress(0.25, desc="Loading document...")
|
93 |
# Load document and create splits
|
94 |
+
doc_splits = load_doc(list_file_path)
|
95 |
# Create or load Vector database
|
96 |
progress(0.5, desc="Generating vector database...")
|
97 |
# global vector_db
|
|
|
139 |
for idx, file in enumerate(file_obj):
|
140 |
file_path = file_obj.name
|
141 |
list_file_path.append(file_path)
|
|
|
|
|
142 |
return list_file_path
|
143 |
|
144 |
|
|
|
189 |
clear_btn = gr.ClearButton([msg, chatbot])
|
190 |
|
191 |
# Preprocessing events
|
|
|
192 |
db_btn.click(initialize_database, \
|
193 |
+
inputs=[document], \
|
194 |
outputs=[vector_db, db_progress])
|
195 |
qachain_btn.click(initialize_LLM, \
|
196 |
inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \
|