mathieunlp's picture
Update app.py
bce2b97 verified
import gradio as gr
from rapidfuzz import process, fuzz
import pandas as pd, re
DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
STOPWORDS = {...}
def _meaningful_tokens(text: str):
words = re.split(r"\W+", text.lower())
return [w for w in words if w and w not in STOPWORDS]
@gr.tools( # NEW – turns the fn into an MCP tool
name="purine_ingredients_fuzzy_search",
description="Return up to `k` food-items that fuzzy-match the query and their "
"purine data (mg/100 g)."
)
def fuzzy_search(
query: str, # required β†’ will appear in the schema
k: int = 5,
cutoff: int = 75
) -> list[dict]:
"""
Parameters
----------
query : str
Ingredient or dish name to look up.
k : int, optional
Maximum rows to return (default = 5).
cutoff : int, optional
Minimal RapidFuzz token-set ratio (default = 75).
Returns
-------
list[dict]
Each dict contains 'food', 'purines_mg_per_100g', and 'classification'.
"""
# direct substring hits (best precision, no scoring needed)
mask = DF["food"].str.contains(query, case=False, regex=False)
direct = DF[mask]
if not direct.empty:
return direct.head(k).to_dict(orient="records")
# token filter
q_tokens = set(_meaningful_tokens(query))
if not q_tokens:
return [] # nothing to match on
candidates = [
food for food in DF["food"]
if q_tokens & set(_meaningful_tokens(food))
]
# fuzzy rank
matches = process.extract(
query, candidates,
scorer=fuzz.token_set_ratio,
score_cutoff=cutoff,
limit=k
)
rows = DF[DF["food"].isin([m[0] for m in matches])]
return rows.to_dict(orient="records")
# ---------- Minimal UI (optional) -------------------------------------
with gr.Blocks(title="Purine DB MCP") as demo:
gr.Markdown("## Purine Lookup Tools (MCP-enabled)")
with gr.Tab("Fuzzy search"):
in2 = gr.Textbox(label="Fuzzy term")
out2 = gr.JSON()
in2.submit(fuzzy_search, in2, out2)
# ---------- Launch ----------------------------------------------------
demo.launch(mcp_server=True)