Spaces:
Running
Running
| # app_pure_rag.py | |
| import numpy as np | |
| import faiss | |
| import gradio as gr | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from sentence_transformers import SentenceTransformer | |
| # --- Load and Prepare Data --- | |
| with open("gen_agents.txt", "r", encoding="utf-8") as f: | |
| full_text = f.read() | |
| # Split text into passages | |
| text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=512, chunk_overlap=20) | |
| docs = text_splitter.create_documents([full_text]) | |
| passages = [doc.page_content for doc in docs] | |
| # Initialize embedder and build FAISS index | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| passage_embeddings = embedder.encode(passages, convert_to_tensor=False, show_progress_bar=True) | |
| passage_embeddings = np.array(passage_embeddings).astype("float32") | |
| d = passage_embeddings.shape[1] | |
| index = faiss.IndexFlatL2(d) | |
| index.add(passage_embeddings) | |
| # --- Provided Functions --- | |
| def retrieve_passages(query, embedder, index, passages, top_k=3): | |
| """ | |
| Retrieve the top-k most relevant passages based on the query. | |
| """ | |
| query_embedding = embedder.encode([query], convert_to_tensor=False) | |
| query_embedding = np.array(query_embedding).astype('float32') | |
| distances, indices = index.search(query_embedding, top_k) | |
| retrieved = [passages[i] for i in indices[0]] | |
| return retrieved | |
| # --- Gradio App Function --- | |
| def get_pure_rag_output(query): | |
| retrieved = retrieve_passages(query, embedder, index, passages, top_k=3) | |
| rag_text = "\n".join([f"Passage {i+1}: {p}" for i, p in enumerate(retrieved)]) | |
| # Wrap text in a styled div | |
| return f"<div style='white-space: pre-wrap;'>{rag_text}</div>" | |
| def clear_output(): | |
| return "" | |
| # --- Custom CSS for a ChatGPT-like Dark Theme --- | |
| custom_css = """ | |
| body { | |
| background-color: #343541 !important; | |
| color: #ECECEC !important; | |
| margin: 0; | |
| padding: 0; | |
| font-family: 'Inter', sans-serif; | |
| } | |
| #container { | |
| max-width: 900px; | |
| margin: 0 auto; | |
| padding: 20px; | |
| } | |
| label { | |
| color: #ECECEC; | |
| font-weight: 600; | |
| } | |
| textarea, input { | |
| background-color: #40414F; | |
| color: #ECECEC; | |
| border: 1px solid #565869; | |
| } | |
| button { | |
| background-color: #565869; | |
| color: #ECECEC; | |
| border: none; | |
| font-weight: 600; | |
| transition: background-color 0.2s ease; | |
| } | |
| button:hover { | |
| background-color: #6e7283; | |
| } | |
| .output-box { | |
| border: 1px solid #565869; | |
| border-radius: 4px; | |
| padding: 10px; | |
| margin-top: 8px; | |
| background-color: #40414F; | |
| } | |
| """ | |
| # --- Build Gradio Interface --- | |
| with gr.Blocks(css=custom_css) as demo: | |
| with gr.Column(elem_id="container"): | |
| gr.Markdown("## Pure RAG Output\nDisplays the retrieved passages from the corpus.") | |
| query_input = gr.Textbox(label="Query", placeholder="Enter your query here...", lines=1) | |
| with gr.Column(): | |
| submit_button = gr.Button("Submit") | |
| clear_button = gr.Button("Clear") | |
| output_box = gr.HTML(label="Retrieved Passages", elem_classes="output-box") | |
| submit_button.click(fn=get_pure_rag_output, inputs=query_input, outputs=output_box) | |
| clear_button.click(fn=clear_output, inputs=[], outputs=output_box) | |
| demo.launch() | |