Spaces:
Running
on
T4
Running
on
T4
| import gradio as gr | |
| import sys | |
| from utils.retriever import get_context, get_vectorstore | |
| # Initialize vector store at startup | |
| print("Initializing vector store connection...", flush=True) | |
| try: | |
| vectorstore = get_vectorstore() | |
| print("Vector store connection initialized successfully", flush=True) | |
| except Exception as e: | |
| print(f"Failed to initialize vector store: {e}", flush=True) | |
| raise | |
| # --------------------------------------------------------------------- | |
| # MCP - returns raw dictionary format | |
| # --------------------------------------------------------------------- | |
| def retrieve( | |
| query:str, | |
| collection_name:str = None, | |
| filter_metadata:dict = None, | |
| ) -> list: | |
| """ | |
| Retrieve semantically similar documents from the vector database for MCP clients. | |
| Args: | |
| query (str): The search query text | |
| reports_filter (str): Comma-separated list of specific report filenames (optional) | |
| sources_filter (str): Filter by document source type (optional) | |
| subtype_filter (str): Filter by document subtype (optional) | |
| year_filter (str): Comma-separated list of years to filter by (optional) | |
| Returns: | |
| list: List of dictionaries containing document content, metadata, and scores | |
| """ | |
| # Call retriever function and return raw results | |
| results = get_context( | |
| vectorstore=vectorstore, | |
| query=query, | |
| collection_name=collection_name, | |
| filter_metadata = filter_metadata | |
| ) | |
| return results | |
| # Create the Gradio interface with Blocks to support both UI and MCP | |
| with gr.Blocks() as ui: | |
| gr.Markdown("# ChatFed Retrieval/Reranker Module") | |
| gr.Markdown("Retrieves semantically similar documents from vector database and reranks. Intended for use in RAG pipelines as an MCP server with other ChatFed modules.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| query_input = gr.Textbox( | |
| label="Query", | |
| lines=2, | |
| placeholder="Enter your search query here", | |
| info="The query to search for in the vector database" | |
| ) | |
| collection_name = gr.Textbox( | |
| label="Collection Name (optional)", | |
| lines=1, | |
| placeholder="EUDR, Humboldt", | |
| info="Name of the collection" | |
| ) | |
| sources_input = gr.Textbox( | |
| label="Sources Filter key to be looked in metadata (optional)", | |
| lines=1, | |
| placeholder="country", | |
| info="Filter by document source type (leave empty for all)" | |
| ) | |
| sources_value = gr.Textbox( | |
| label="Value in filter to be looked for(optional)", | |
| lines=1, | |
| placeholder="Ecuador, Guatemala", | |
| info="Filter by document subtype (leave empty for all)" | |
| ) | |
| if sources_input & sources_value: | |
| filter_metadata = {sources_input:sources_value} | |
| else: | |
| filter_metadata = None | |
| submit_btn = gr.Button("Submit", variant="primary") | |
| # Output needs to be in json format to be added as tool in HuggingChat | |
| with gr.Column(): | |
| output = gr.Text( | |
| label="Retrieved Context", | |
| lines=10, | |
| show_copy_button=True | |
| ) | |
| # UI event handler | |
| submit_btn.click( | |
| fn=retrieve, | |
| inputs=[query_input,collection_name, filter_metadata], | |
| outputs=output, | |
| api_name="retrieve" | |
| ) | |
| # Launch with MCP server enabled | |
| if __name__ == "__main__": | |
| ui.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| #mcp_server=True, | |
| show_error=True | |
| ) |