akshay326 commited on
Commit
af7f6d4
·
unverified ·
1 Parent(s): 2c96c1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -18,15 +18,16 @@ CHUNK_SIZE = 600
18
  CHUNK_OVERLAP = 40
19
 
20
  # Load PDF document and create doc splits
21
- def load_doc(list_file_path, chunk_size, chunk_overlap):
22
  loaders = [PyPDFLoader(x) for x in list_file_path]
23
  pages = []
24
  for loader in loaders:
25
  pages.extend(loader.load())
26
 
27
  text_splitter = RecursiveCharacterTextSplitter(
28
- chunk_size = chunk_size,
29
- chunk_overlap = chunk_overlap)
 
30
  doc_splits = text_splitter.split_documents(pages)
31
  return doc_splits
32
 
@@ -84,14 +85,13 @@ def initialize_llmchain(temperature, max_tokens, top_k, vector_db, progress=gr.P
84
 
85
 
86
  # Initialize database
87
- def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Progress()):
88
  # Create list of documents (when valid)
89
- #file_path = file_obj.name
90
  list_file_path = [x.name for x in list_file_obj if x is not None]
91
  # print('list_file_path', list_file_path)
92
  progress(0.25, desc="Loading document...")
93
  # Load document and create splits
94
- doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
95
  # Create or load Vector database
96
  progress(0.5, desc="Generating vector database...")
97
  # global vector_db
@@ -139,8 +139,6 @@ def upload_file(file_obj):
139
  for idx, file in enumerate(file_obj):
140
  file_path = file_obj.name
141
  list_file_path.append(file_path)
142
- # print(file_path)
143
- # initialize_database(file_path, progress)
144
  return list_file_path
145
 
146
 
@@ -191,9 +189,8 @@ def demo():
191
  clear_btn = gr.ClearButton([msg, chatbot])
192
 
193
  # Preprocessing events
194
- #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
195
  db_btn.click(initialize_database, \
196
- inputs=[document, CHUNK_SIZE, CHUNK_OVERLAP], \
197
  outputs=[vector_db, db_progress])
198
  qachain_btn.click(initialize_LLM, \
199
  inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \
 
18
  CHUNK_OVERLAP = 40
19
 
20
  # Load PDF document and create doc splits
21
+ def load_doc(list_file_path):
22
  loaders = [PyPDFLoader(x) for x in list_file_path]
23
  pages = []
24
  for loader in loaders:
25
  pages.extend(loader.load())
26
 
27
  text_splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size = CHUNK_SIZE,
29
+ chunk_overlap = CHUNK_OVERLAP
30
+ )
31
  doc_splits = text_splitter.split_documents(pages)
32
  return doc_splits
33
 
 
85
 
86
 
87
  # Initialize database
88
+ def initialize_database(list_file_obj, progress=gr.Progress()):
89
  # Create list of documents (when valid)
 
90
  list_file_path = [x.name for x in list_file_obj if x is not None]
91
  # print('list_file_path', list_file_path)
92
  progress(0.25, desc="Loading document...")
93
  # Load document and create splits
94
+ doc_splits = load_doc(list_file_path)
95
  # Create or load Vector database
96
  progress(0.5, desc="Generating vector database...")
97
  # global vector_db
 
139
  for idx, file in enumerate(file_obj):
140
  file_path = file_obj.name
141
  list_file_path.append(file_path)
 
 
142
  return list_file_path
143
 
144
 
 
189
  clear_btn = gr.ClearButton([msg, chatbot])
190
 
191
  # Preprocessing events
 
192
  db_btn.click(initialize_database, \
193
+ inputs=[document], \
194
  outputs=[vector_db, db_progress])
195
  qachain_btn.click(initialize_LLM, \
196
  inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \