Update retrieval.py
Browse files- retrieval.py +10 -1
retrieval.py
CHANGED
@@ -80,7 +80,7 @@ class EmbedFunction:
|
|
80 |
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
81 |
embed_function = EmbedFunction(EMBED_MODEL_NAME)
|
82 |
|
83 |
-
# Use a temporary directory for persistent storage
|
84 |
temp_dir = tempfile.mkdtemp()
|
85 |
print("Using temporary persist_directory:", temp_dir)
|
86 |
|
@@ -97,6 +97,15 @@ collection = client.get_or_create_collection(
|
|
97 |
embedding_function=embed_function
|
98 |
)
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
def index_pubmed_docs(docs: List[str], prefix: str = "doc"):
|
101 |
"""
|
102 |
Adds documents to the Chromadb collection with unique IDs.
|
|
|
80 |
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
81 |
embed_function = EmbedFunction(EMBED_MODEL_NAME)
|
82 |
|
83 |
+
# Use a temporary directory for persistent storage.
|
84 |
temp_dir = tempfile.mkdtemp()
|
85 |
print("Using temporary persist_directory:", temp_dir)
|
86 |
|
|
|
97 |
embedding_function=embed_function
|
98 |
)
|
99 |
|
100 |
+
# Force initialization: add a dummy document and perform a dummy query.
|
101 |
+
try:
|
102 |
+
collection.add(documents=["dummy"], ids=["dummy"])
|
103 |
+
_ = collection.query(query_texts=["dummy"], n_results=1)
|
104 |
+
# Optionally, remove the dummy document if needed (Chromadb might not support deletion, so you can ignore it)
|
105 |
+
print("Dummy initialization successful.")
|
106 |
+
except Exception as init_err:
|
107 |
+
print("Dummy initialization failed:", init_err)
|
108 |
+
|
109 |
def index_pubmed_docs(docs: List[str], prefix: str = "doc"):
|
110 |
"""
|
111 |
Adds documents to the Chromadb collection with unique IDs.
|