Chris4K commited on
Commit
e181ae7
·
verified ·
1 Parent(s): 3b4ef40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -16
app.py CHANGED
@@ -20,24 +20,15 @@ print("-----------")
20
  print(documents[0])
21
  print("-----------")
22
 
23
- # Load the document, split it into chunks, embed each chunk, and load it into the vector store.
24
- text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
25
- vdocuments = text_splitter.split_documents(documents)
26
-
27
- # Add these lines before creating the Chroma vector store
28
- #print("Length of embeddings: %s", len(api_hf_embeddings))
29
- print("Length of documents: %s" % len(documents))
30
-
31
- print("Length of vdocuments: %s", len(vdocuments))
32
- # Add these lines before creating the Chroma vector store
33
- #logger.debug("Length of vdocuments: %s", len(vdocuments))
34
- if vdocuments and 'embeddings' in vdocuments[0]:
35
- first_document_embeddings = vdocuments[0]['embeddings']
36
- print("Length of embeddings for the first document: {}".format(len(first_document_embeddings)))
37
 
 
 
 
 
38
 
39
  # Create Chroma vector store for API embeddings
40
- api_db = Chroma.from_documents(vdocuments, api_hf_embeddings, collection_name="api-collection")
 
41
 
42
  # Define the PDF retrieval function
43
  def pdf_retrieval(query):
@@ -57,4 +48,4 @@ api_tool = gr.Interface(
57
  )
58
 
59
  # Launch the Gradio interface
60
- api_tool.launch()
 
20
  print(documents[0])
21
  print("-----------")
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Extract the embedding arrays from the PDF documents
25
+ embeddings = []
26
+ for doc in documents:
27
+ embeddings.extend(doc['embeddings'])
28
 
29
  # Create Chroma vector store for API embeddings
30
+ api_db = Chroma.from_texts(embeddings, api_hf_embeddings, collection_name="api-collection")
31
+
32
 
33
  # Define the PDF retrieval function
34
  def pdf_retrieval(query):
 
48
  )
49
 
50
  # Launch the Gradio interface
51
+ #api_tool.launch()