Spaces:
Sleeping
Sleeping
File size: 4,463 Bytes
fa3eb69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
from langchain_core.documents.base import Document
from langchain_qdrant import QdrantVectorStore
from langchain_community.vectorstores import Qdrant
from langchain_core.vectorstores import VectorStoreRetriever
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from typing import Literal, Optional, List, Any
from uuid import UUID
class QdrantVectorstoreHelper:
def __init__(self) -> Any:
self.client = None
if os.getenv('QDRANT_API_KEY') and os.getenv('QDRANT_URL'):
self.client = QdrantClient(
url=os.getenv('QDRANT_URL'),
api_key=os.getenv('QDRANT_API_KEY')
)
else:
print("Qdrant API Key and URL not present.")
def create_collection(self, name: str, vector_size: int) -> None:
if self.client:
self.client.create_collection(
collection_name=name,
vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
)
else:
print('No Qdrant Client')
def create_local_vectorstore(
self,
chunks: List[Document],
embedding_model: OpenAIEmbeddings | HuggingFaceInferenceAPIEmbeddings = OpenAIEmbeddings(model='text-embedding-3-large'),
vector_size: int = 3072
) -> None:
self.local_vectorstore = Qdrant.from_documents(
documents=chunks,
vector_params={'size': vector_size, 'distance': Distance.COSINE},
embedding=embedding_model,
batch_size=32 if type(embedding_model) == HuggingFaceInferenceAPIEmbeddings else 64,
location=":memory:"
)
def create_cloud_vectorstore(
self,
chunks: List[Document],
collection_name: str,
embedding_model: OpenAIEmbeddings | HuggingFaceInferenceAPIEmbeddings = OpenAIEmbeddings(model='text-embedding-3-large'),
vector_size: int = 3072
) -> None:
try:
self.cloud_vectorstore = QdrantVectorStore.from_existing_collection(
embedding=embedding_model,
collection_name=collection_name,
url=os.getenv('QDRANT_URL'),
api_key=os.getenv('QDRANT_API_KEY')
)
except:
self.cloud_vectorstore = QdrantVectorStore.from_documents(
documents=chunks,
embedding=embedding_model,
vector_params={'size': vector_size, 'distance': Distance.COSINE},
collection_name=collection_name,
batch_size=4 if type(embedding_model) == HuggingFaceInferenceAPIEmbeddings else 64,
prefer_grpc=True,
url=os.getenv('QDRANT_URL'),
api_key=os.getenv('QDRANT_API_KEY')
)
def add_docs_to_vectorstore(
self,
collection_name: Literal['memory'] | str,
chunks: List[Document],
uuids: UUID
) -> None:
str_uuids = [str(uuid) for uuid in uuids]
if collection_name == 'memory':
self.local_vectorstore.add_documents(documents=chunks, ids=str_uuids)
else:
self.cloud_vectorstore = QdrantVectorStore.from_existing_collection(
collection_name=collection_name,
url=os.getenv('QDRANT_URL'),
api_key=os.getenv('QDRANT_API_KEY')
)
self.cloud_vectorstore.add_documents(documents=chunks, ids=str_uuids)
def get_retriever(
self,
collection_name: Literal['memory'] | str,
k: int = 3,
embedding_model: OpenAIEmbeddings = OpenAIEmbeddings(model='text-embedding-3-large')
) -> VectorStoreRetriever:
if collection_name == 'memory':
return self.local_vectorstore.as_retriever(search_kwargs={'k': k})
else:
self.cloud_vectorstore = QdrantVectorStore.from_existing_collection(
collection_name=collection_name,
embedding=embedding_model,
url=os.getenv('QDRANT_URL'),
api_key=os.getenv('QDRANT_API_KEY')
)
return self.cloud_vectorstore.as_retriever(search_kwargs={'k': k}) |