#import recurive textsplitter from sentence_transformers import SentenceTransformer import chromadb import uuid import os class RAG_1177: def __init__(self): self.db_name = "RAG_1177" self.model = SentenceTransformer('KBLab/sentence-bert-swedish-cased') self.client = chromadb.PersistentClient(path="RAG_1177_db") self.db = self.client.get_or_create_collection(self.db_name) def get_embeddings(self, text): embeddings = self.model.encode(text) return (embeddings.tolist()) def get_ids(self, num_ids): ids = [str(uuid.uuid4()) for _ in range(num_ids)] return ids def delete_collection(self): self.client.delete_collection(self.db_name) return def retrieve(self, query, num_results): query_emb = self.get_embeddings(query) result = self.db.query(query_embeddings=query_emb, n_results=num_results, include=['documents', 'metadatas']) result_urls = result['metadatas'][0] result_docs = result['documents'][0] url_list = set([item['url'] for item in result_urls]) result_urls = "Läs mer på:\n" for i, url in enumerate(url_list, start=1): result_urls += f"{i}: {url}\n" return result_docs, result_urls def insert(self,docs, emb, urls, ids): self.db.add(documents=docs, embeddings=emb, metadatas=urls, ids=ids) return