from qdrant_client import QdrantClient from fastembed import SparseTextEmbedding, LateInteractionTextEmbedding from qdrant_client import QdrantClient, models from sentence_transformers import SentenceTransformer import os class NeuralSearcher: def __init__(self, collection_name): self.collection_name = collection_name self.dense_model = SentenceTransformer(os.getenv('DENSE_MODEL'),device="cpu") self.sparse_model = SparseTextEmbedding(os.getenv('SPARSE_MODEL')) self.late_interaction_model = LateInteractionTextEmbedding(os.getenv('LATE_INTERACTION_MODEL')) self.qdrant_client = QdrantClient(os.getenv('QDRANT_URL'),api_key=os.getenv('QDRANT_API_KEY')) async def search(self, text: str): dense_query = self.dense_model.encode(text).tolist() sparse_query = next(self.sparse_model.query_embed(text)) # late_query = next(self.late_interaction_model.query_embed(text)) prefetch = [ models.Prefetch( query=dense_query, using=os.getenv('DENSE_MODEL'), limit=200 ), models.Prefetch( query=models.SparseVector(**sparse_query.as_object()), using=os.getenv('SPARSE_MODEL'), limit=200 ) ] search_result = self.qdrant_client.query_points( collection_name= self.collection_name, prefetch=prefetch, query=models.FusionQuery( fusion=models.Fusion.RRF, ), # using=os.getenv('LATE_INTERACTION_MODEL'), with_payload=True, limit = 10 ).points data = [] for hit in search_result: data.append(hit.payload) return data