Spaces:
Runtime error
Runtime error
"""Mongo client.""" | |
from typing import Dict, List, Optional | |
from gpt_index.readers.base import BaseReader | |
from gpt_index.readers.schema.base import Document | |
class SimpleMongoReader(BaseReader): | |
"""Simple mongo reader. | |
Concatenates each Mongo doc into Document used by LlamaIndex. | |
Args: | |
host (str): Mongo host. | |
port (int): Mongo port. | |
max_docs (int): Maximum number of documents to load. | |
""" | |
def __init__(self, host: str, port: int, max_docs: int = 1000) -> None: | |
"""Initialize with parameters.""" | |
try: | |
import pymongo # noqa: F401 | |
from pymongo import MongoClient # noqa: F401 | |
except ImportError: | |
raise ImportError( | |
"`pymongo` package not found, please run `pip install pymongo`" | |
) | |
self.client: MongoClient = MongoClient(host, port) | |
self.max_docs = max_docs | |
def load_data( | |
self, db_name: str, collection_name: str, query_dict: Optional[Dict] = None | |
) -> List[Document]: | |
"""Load data from the input directory. | |
Args: | |
db_name (str): name of the database. | |
collection_name (str): name of the collection. | |
query_dict (Optional[Dict]): query to filter documents. | |
Defaults to None | |
Returns: | |
List[Document]: A list of documents. | |
""" | |
documents = [] | |
db = self.client[db_name] | |
if query_dict is None: | |
cursor = db[collection_name].find() | |
else: | |
cursor = db[collection_name].find(query_dict) | |
for item in cursor: | |
if "text" not in item: | |
raise ValueError("`text` field not found in Mongo document.") | |
documents.append(Document(item["text"])) | |
return documents | |