Spaces:

mathieunlp
/

purine-ingredients

Runtime error

App Files Files Community

mathieunlp commited on Jun 9

Commit

d2f0984

verified ·

1 Parent(s): 20fdf33

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -10

app.py CHANGED Viewed

@@ -1,10 +1,14 @@
 import gradio as gr
 import pandas as pd
 from rapidfuzz import process, fuzz
 # Load dataset once
-DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
 # ---------- MCP Tools -------------------------------------------------
 def lookup_food(name: str):
     """
@@ -15,6 +19,12 @@ def lookup_food(name: str):
         return {"error": f"No exact match for '{name}'."}
     return row.iloc[0].to_dict()
 def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
     """
     Fuzzy-match *query* against DF['food'].
@@ -22,13 +32,13 @@ def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
     • Otherwise, keep only rows that share ≥1 meaningful token
       and score them with token_set_ratio.
     """
-    # 1️⃣ direct substring hits (best precision, no scoring needed)
     mask = DF["food"].str.contains(query, case=False, regex=False)
     direct = DF[mask]
     if not direct.empty:
         return direct.head(k).to_dict(orient="records")
-    # 2️⃣ token filter
     q_tokens = set(_meaningful_tokens(query))
     if not q_tokens:
         return []  # nothing to match on
@@ -38,7 +48,7 @@ def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
         if q_tokens & set(_meaningful_tokens(food))
     ]
-    # 3️⃣ fuzzy rank
     matches = process.extract(
         query, candidates,
         scorer=fuzz.token_set_ratio,
@@ -61,8 +71,4 @@ with gr.Blocks(title="Purine DB MCP") as demo:
         in2.submit(fuzzy_search, in2, out2)
 # ---------- Launch ----------------------------------------------------
-demo.launch(
-    server_name="0.0.0.0",  # expose on container/VM
-    share=False,            # True if you want a public Gradio link
-    mcp_server=True,        # 🌟 <- THIS turns it into an MCP endpoint
-)

 import gradio as gr
 import pandas as pd
+import re
 from rapidfuzz import process, fuzz
 # Load dataset once
+DF = pd.read_csv("./data/food_purine_mcp_ready_v2.csv")
+STOPWORDS = {
+    "raw", "cooked", "boiled", "roasted", "fried", "baked",
+    "steamed", "grilled", "ground", "unspecified", "dried",
+}
 # ---------- MCP Tools -------------------------------------------------
 def lookup_food(name: str):
     """
         return {"error": f"No exact match for '{name}'."}
     return row.iloc[0].to_dict()
+def _meaningful_tokens(text: str):
+    """lower-cases and drops stop-words & punctuation."""
+    words = re.split(r"\W+", text.lower())
+    return [w for w in words if w and w not in STOPWORDS]
 def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
     """
     Fuzzy-match *query* against DF['food'].
     • Otherwise, keep only rows that share ≥1 meaningful token
       and score them with token_set_ratio.
     """
+    # direct substring hits (best precision, no scoring needed)
     mask = DF["food"].str.contains(query, case=False, regex=False)
     direct = DF[mask]
     if not direct.empty:
         return direct.head(k).to_dict(orient="records")
+    # token filter
     q_tokens = set(_meaningful_tokens(query))
     if not q_tokens:
         return []  # nothing to match on
         if q_tokens & set(_meaningful_tokens(food))
     ]
+    # fuzzy rank
     matches = process.extract(
         query, candidates,
         scorer=fuzz.token_set_ratio,
         in2.submit(fuzzy_search, in2, out2)
 # ---------- Launch ----------------------------------------------------
+demo.launch(mcp_server=True)