import gradio as gr from rapidfuzz import process, fuzz import pandas as pd, re DF = pd.read_csv("./food_purine_mcp_ready_v2.csv") STOPWORDS = {...} def _meaningful_tokens(text: str): words = re.split(r"\W+", text.lower()) return [w for w in words if w and w not in STOPWORDS] @gr.tools( # NEW – turns the fn into an MCP tool name="purine_ingredients_fuzzy_search", description="Return up to `k` food-items that fuzzy-match the query and their " "purine data (mg/100 g)." ) def fuzzy_search( query: str, # required → will appear in the schema k: int = 5, cutoff: int = 75 ) -> list[dict]: """ Parameters ---------- query : str Ingredient or dish name to look up. k : int, optional Maximum rows to return (default = 5). cutoff : int, optional Minimal RapidFuzz token-set ratio (default = 75). Returns ------- list[dict] Each dict contains 'food', 'purines_mg_per_100g', and 'classification'. """ # direct substring hits (best precision, no scoring needed) mask = DF["food"].str.contains(query, case=False, regex=False) direct = DF[mask] if not direct.empty: return direct.head(k).to_dict(orient="records") # token filter q_tokens = set(_meaningful_tokens(query)) if not q_tokens: return [] # nothing to match on candidates = [ food for food in DF["food"] if q_tokens & set(_meaningful_tokens(food)) ] # fuzzy rank matches = process.extract( query, candidates, scorer=fuzz.token_set_ratio, score_cutoff=cutoff, limit=k ) rows = DF[DF["food"].isin([m[0] for m in matches])] return rows.to_dict(orient="records") # ---------- Minimal UI (optional) ------------------------------------- with gr.Blocks(title="Purine DB MCP") as demo: gr.Markdown("## Purine Lookup Tools (MCP-enabled)") with gr.Tab("Fuzzy search"): in2 = gr.Textbox(label="Fuzzy term") out2 = gr.JSON() in2.submit(fuzzy_search, in2, out2) # ---------- Launch ---------------------------------------------------- demo.launch(mcp_server=True)