Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -178,15 +178,37 @@ def getRAGChain(customerName,customerDistrict, custDetailsPresent,vectordb):
|
|
178 |
)
|
179 |
return chain
|
180 |
|
181 |
-
def createVectorDB(documents):
|
182 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
|
183 |
-
texts =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
print("All chunk List START ***********************\n\n")
|
185 |
pretty_print_docs(texts)
|
186 |
print("All chunk List END ***********************\n\n")
|
187 |
-
embeddings = getEmbeddingModel(
|
188 |
-
|
|
|
|
|
189 |
return vectordb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
def createPrompt(cName, cCity, custDetailsPresent):
|
192 |
cProfile = "Customer's Name is " + cName + "\nCustomer's lives in or customer's Resident State or Customer's place is " + cCity + "\n"
|
@@ -213,7 +235,7 @@ def createPrompt(cName, cCity, custDetailsPresent):
|
|
213 |
PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"])
|
214 |
return PROMPT
|
215 |
|
216 |
-
vectordb = createVectorDB(loadKB(False, False, uploads_dir, None))
|
217 |
|
218 |
@app.route('/', methods=['GET'])
|
219 |
def test():
|
@@ -287,7 +309,7 @@ def file_Upload():
|
|
287 |
embeddingModelID = int(request.form.getlist('embeddingModelID')[0])
|
288 |
global vectordb
|
289 |
vectordb = createVectorDB(documents, embeddingModelID)
|
290 |
-
vectordb=createVectorDB(documents)
|
291 |
return render_template("index.html")
|
292 |
|
293 |
if __name__ == '__main__':
|
|
|
178 |
)
|
179 |
return chain
|
180 |
|
181 |
+
def createVectorDB(documents,embeddingModelID):
|
182 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
|
183 |
+
texts = []
|
184 |
+
for document in documents:
|
185 |
+
tokenized_input = tokenizer.tokenize(document.page_content)
|
186 |
+
print("Token Count::::::::::" + str(len(tokenized_input)))
|
187 |
+
if (len(tokenized_input) > 1000):
|
188 |
+
print("Splitting Content using RTS")
|
189 |
+
splitted_doc = text_splitter.split_documents([document])
|
190 |
+
texts.extend(splitted_doc)
|
191 |
+
# for text in texts:
|
192 |
+
# print("splitted content:"+str(len(text.page_content)))
|
193 |
+
# print(text.page_content)
|
194 |
+
elif (len(tokenized_input) < 1000 and len(tokenized_input) > 1):
|
195 |
+
texts.append(document)
|
196 |
+
# texts = text_splitter.split_documents(documents)
|
197 |
print("All chunk List START ***********************\n\n")
|
198 |
pretty_print_docs(texts)
|
199 |
print("All chunk List END ***********************\n\n")
|
200 |
+
embeddings = getEmbeddingModel(embeddingModelID)
|
201 |
+
print("Embedding Started >>>>>>>>>>>>>>>>>>", datetime.now().strftime("%H:%M:%S"))
|
202 |
+
vectordb = Chroma.from_documents(texts, embeddings, collection_metadata={"hnsw:space": "cosine"})
|
203 |
+
print("Vector Store Creation Completed*********************************\n\n")
|
204 |
return vectordb
|
205 |
+
# texts = text_splitter.split_documents(documents)
|
206 |
+
# print("All chunk List START ***********************\n\n")
|
207 |
+
# pretty_print_docs(texts)
|
208 |
+
# print("All chunk List END ***********************\n\n")
|
209 |
+
# embeddings = getEmbeddingModel(0)
|
210 |
+
# vectordb = Chroma.from_documents(texts, embeddings)
|
211 |
+
# return vectordb
|
212 |
|
213 |
def createPrompt(cName, cCity, custDetailsPresent):
|
214 |
cProfile = "Customer's Name is " + cName + "\nCustomer's lives in or customer's Resident State or Customer's place is " + cCity + "\n"
|
|
|
235 |
PROMPT = PromptTemplate(template=prompt_template, input_variables=["history", "context", "question"])
|
236 |
return PROMPT
|
237 |
|
238 |
+
vectordb = createVectorDB(loadKB(False, False, uploads_dir, None),defaultEmbeddingModelID)
|
239 |
|
240 |
@app.route('/', methods=['GET'])
|
241 |
def test():
|
|
|
309 |
embeddingModelID = int(request.form.getlist('embeddingModelID')[0])
|
310 |
global vectordb
|
311 |
vectordb = createVectorDB(documents, embeddingModelID)
|
312 |
+
#vectordb=createVectorDB(documents)
|
313 |
return render_template("index.html")
|
314 |
|
315 |
if __name__ == '__main__':
|