from sentence_transformers import SentenceTransformer import numpy as np from typing import Sequence import faiss class Vectorizer: def __init__(self, model) -> None: """Initialize the vectorizer with a pre-trained embedding model. Args: model: The pre-trained embedding model to use for transforming prompts. """ self.model = model self.index_size = 50000 self.index = faiss.IndexFlatIP(self.index_size) self.cached_index_idx_to_retrieval_db_idx = [] def transform_and_add_to_index(self, prompts: Sequence[str]) -> np.ndarray: """Transform texts into numerical vectors using the specified model. Args: prompts: The sequence of raw corpus prompts. Returns: Vectorized prompts """ embeddings = self.model.encode(prompts) embedding_dimension = embeddings.shape[1] print('Embedding dimension:', embedding_dimension) self.index.add(np.array(embeddings))