mgbam commited on
Commit
118ab17
·
verified ·
1 Parent(s): d183895

Update retrieval.py

Browse files
Files changed (1) hide show
  1. retrieval.py +10 -1
retrieval.py CHANGED
@@ -80,7 +80,7 @@ class EmbedFunction:
80
  EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
81
  embed_function = EmbedFunction(EMBED_MODEL_NAME)
82
 
83
- # Use a temporary directory for persistent storage
84
  temp_dir = tempfile.mkdtemp()
85
  print("Using temporary persist_directory:", temp_dir)
86
 
@@ -97,6 +97,15 @@ collection = client.get_or_create_collection(
97
  embedding_function=embed_function
98
  )
99
 
 
 
 
 
 
 
 
 
 
100
  def index_pubmed_docs(docs: List[str], prefix: str = "doc"):
101
  """
102
  Adds documents to the Chromadb collection with unique IDs.
 
80
  EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
81
  embed_function = EmbedFunction(EMBED_MODEL_NAME)
82
 
83
+ # Use a temporary directory for persistent storage.
84
  temp_dir = tempfile.mkdtemp()
85
  print("Using temporary persist_directory:", temp_dir)
86
 
 
97
  embedding_function=embed_function
98
  )
99
 
100
+ # Force initialization: add a dummy document and perform a dummy query.
101
+ try:
102
+ collection.add(documents=["dummy"], ids=["dummy"])
103
+ _ = collection.query(query_texts=["dummy"], n_results=1)
104
+ # Optionally, remove the dummy document if needed (Chromadb might not support deletion, so you can ignore it)
105
+ print("Dummy initialization successful.")
106
+ except Exception as init_err:
107
+ print("Dummy initialization failed:", init_err)
108
+
109
  def index_pubmed_docs(docs: List[str], prefix: str = "doc"):
110
  """
111
  Adds documents to the Chromadb collection with unique IDs.