# app_pure_rag.py import numpy as np import faiss import gradio as gr from langchain.text_splitter import CharacterTextSplitter from sentence_transformers import SentenceTransformer # --- Load and Prepare Data --- with open("gen_agents.txt", "r", encoding="utf-8") as f: full_text = f.read() # Split text into passages text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=512, chunk_overlap=20) docs = text_splitter.create_documents([full_text]) passages = [doc.page_content for doc in docs] # Initialize embedder and build FAISS index embedder = SentenceTransformer('all-MiniLM-L6-v2') passage_embeddings = embedder.encode(passages, convert_to_tensor=False, show_progress_bar=True) passage_embeddings = np.array(passage_embeddings).astype("float32") d = passage_embeddings.shape[1] index = faiss.IndexFlatL2(d) index.add(passage_embeddings) # --- Provided Functions --- def retrieve_passages(query, embedder, index, passages, top_k=3): """ Retrieve the top-k most relevant passages based on the query. """ query_embedding = embedder.encode([query], convert_to_tensor=False) query_embedding = np.array(query_embedding).astype('float32') distances, indices = index.search(query_embedding, top_k) retrieved = [passages[i] for i in indices[0]] return retrieved # --- Gradio App Function --- def get_pure_rag_output(query): retrieved = retrieve_passages(query, embedder, index, passages, top_k=3) rag_text = "\n".join([f"Passage {i+1}: {p}" for i, p in enumerate(retrieved)]) # Wrap text in a styled div return f"