Spaces:

mathieunlp
/

purine-ingredients

Runtime error

mathieunlp commited on Jun 9

Commit

bce2b97

verified ·

1 Parent(s): 2b22ba6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,27 +1,40 @@
 import gradio as gr
-import pandas as pd
-import re
 from rapidfuzz import process, fuzz
-# Load dataset once
 DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
-STOPWORDS = {
-    "raw", "cooked", "boiled", "roasted", "fried", "baked",
-    "steamed", "grilled", "ground", "unspecified", "dried",
-}
-# ---------- MCP Tools -------------------------------------------------
 def _meaningful_tokens(text: str):
-    """lower-cases and drops stop-words & punctuation."""
     words = re.split(r"\W+", text.lower())
     return [w for w in words if w and w not in STOPWORDS]
-def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
     """
-    Fuzzy-match *query* against DF['food'].
-    • First, return any rows that contain the whole query as a substring.
-    • Otherwise, keep only rows that share ≥1 meaningful token
-      and score them with token_set_ratio.
     """
     # direct substring hits (best precision, no scoring needed)
     mask = DF["food"].str.contains(query, case=False, regex=False)
     direct = DF[mask]

 import gradio as gr
 from rapidfuzz import process, fuzz
+import pandas as pd, re
 DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
+STOPWORDS = {...}
 def _meaningful_tokens(text: str):
     words = re.split(r"\W+", text.lower())
     return [w for w in words if w and w not in STOPWORDS]
+@gr.tools(                      # NEW – turns the fn into an MCP tool
+    name="purine_ingredients_fuzzy_search",
+    description="Return up to `k` food-items that fuzzy-match the query and their "
+                "purine data (mg/100 g)."
+)
+def fuzzy_search(
+    query: str,                 # required → will appear in the schema
+    k: int = 5,
+    cutoff: int = 75
+) -> list[dict]:
     """
+    Parameters
+    ----------
+    query : str
+        Ingredient or dish name to look up.
+    k : int, optional
+        Maximum rows to return (default = 5).
+    cutoff : int, optional
+        Minimal RapidFuzz token-set ratio (default = 75).
+    Returns
+    -------
+    list[dict]
+        Each dict contains 'food', 'purines_mg_per_100g', and 'classification'.
     """
     # direct substring hits (best precision, no scoring needed)
     mask = DF["food"].str.contains(query, case=False, regex=False)
     direct = DF[mask]