rag-tool

Sleeping

Chris4K commited on Jan 20, 2024

Commit

e181ae7

verified ·

1 Parent(s): 3b4ef40

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,24 +20,15 @@ print("-----------")
 print(documents[0])
 print("-----------")
-# Load the document, split it into chunks, embed each chunk, and load it into the vector store.
-text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
-vdocuments = text_splitter.split_documents(documents)
-# Add these lines before creating the Chroma vector store
-#print("Length of embeddings: %s", len(api_hf_embeddings))
-print("Length of documents: %s" % len(documents))
-print("Length of vdocuments: %s", len(vdocuments))
-# Add these lines before creating the Chroma vector store
-#logger.debug("Length of vdocuments: %s", len(vdocuments))
-if vdocuments and 'embeddings' in vdocuments[0]:
-    first_document_embeddings = vdocuments[0]['embeddings']
-    print("Length of embeddings for the first document: {}".format(len(first_document_embeddings)))
 # Create Chroma vector store for API embeddings
-api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
 # Define the PDF retrieval function
 def pdf_retrieval(query):
@@ -57,4 +48,4 @@ api_tool = gr.Interface(
 )
 # Launch the Gradio interface
-api_tool.launch()

 print(documents[0])
 print("-----------")
+# Extract the embedding arrays from the PDF documents
+embeddings = []
+for doc in documents:
+    embeddings.extend(doc['embeddings'])
 # Create Chroma vector store for API embeddings
+api_db = Chroma.from_texts(embeddings, api_hf_embeddings, collection_name="api-collection")
 # Define the PDF retrieval function
 def pdf_retrieval(query):
 )
 # Launch the Gradio interface
+#api_tool.launch()