Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
from rapidfuzz import process, fuzz
|
| 4 |
|
| 5 |
# Load dataset once
|
| 6 |
-
DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
| 8 |
# ---------- MCP Tools -------------------------------------------------
|
| 9 |
def lookup_food(name: str):
|
| 10 |
"""
|
|
@@ -15,6 +19,12 @@ def lookup_food(name: str):
|
|
| 15 |
return {"error": f"No exact match for '{name}'."}
|
| 16 |
return row.iloc[0].to_dict()
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
|
| 19 |
"""
|
| 20 |
Fuzzy-match *query* against DF['food'].
|
|
@@ -22,13 +32,13 @@ def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
|
|
| 22 |
• Otherwise, keep only rows that share ≥1 meaningful token
|
| 23 |
and score them with token_set_ratio.
|
| 24 |
"""
|
| 25 |
-
#
|
| 26 |
mask = DF["food"].str.contains(query, case=False, regex=False)
|
| 27 |
direct = DF[mask]
|
| 28 |
if not direct.empty:
|
| 29 |
return direct.head(k).to_dict(orient="records")
|
| 30 |
|
| 31 |
-
#
|
| 32 |
q_tokens = set(_meaningful_tokens(query))
|
| 33 |
if not q_tokens:
|
| 34 |
return [] # nothing to match on
|
|
@@ -38,7 +48,7 @@ def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
|
|
| 38 |
if q_tokens & set(_meaningful_tokens(food))
|
| 39 |
]
|
| 40 |
|
| 41 |
-
#
|
| 42 |
matches = process.extract(
|
| 43 |
query, candidates,
|
| 44 |
scorer=fuzz.token_set_ratio,
|
|
@@ -61,8 +71,4 @@ with gr.Blocks(title="Purine DB MCP") as demo:
|
|
| 61 |
in2.submit(fuzzy_search, in2, out2)
|
| 62 |
|
| 63 |
# ---------- Launch ----------------------------------------------------
|
| 64 |
-
demo.launch(
|
| 65 |
-
server_name="0.0.0.0", # expose on container/VM
|
| 66 |
-
share=False, # True if you want a public Gradio link
|
| 67 |
-
mcp_server=True, # 🌟 <- THIS turns it into an MCP endpoint
|
| 68 |
-
)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
import re
|
| 4 |
from rapidfuzz import process, fuzz
|
| 5 |
|
| 6 |
# Load dataset once
|
| 7 |
+
DF = pd.read_csv("./data/food_purine_mcp_ready_v2.csv")
|
| 8 |
+
STOPWORDS = {
|
| 9 |
+
"raw", "cooked", "boiled", "roasted", "fried", "baked",
|
| 10 |
+
"steamed", "grilled", "ground", "unspecified", "dried",
|
| 11 |
+
}
|
| 12 |
# ---------- MCP Tools -------------------------------------------------
|
| 13 |
def lookup_food(name: str):
|
| 14 |
"""
|
|
|
|
| 19 |
return {"error": f"No exact match for '{name}'."}
|
| 20 |
return row.iloc[0].to_dict()
|
| 21 |
|
| 22 |
+
|
| 23 |
+
def _meaningful_tokens(text: str):
|
| 24 |
+
"""lower-cases and drops stop-words & punctuation."""
|
| 25 |
+
words = re.split(r"\W+", text.lower())
|
| 26 |
+
return [w for w in words if w and w not in STOPWORDS]
|
| 27 |
+
|
| 28 |
def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
|
| 29 |
"""
|
| 30 |
Fuzzy-match *query* against DF['food'].
|
|
|
|
| 32 |
• Otherwise, keep only rows that share ≥1 meaningful token
|
| 33 |
and score them with token_set_ratio.
|
| 34 |
"""
|
| 35 |
+
# direct substring hits (best precision, no scoring needed)
|
| 36 |
mask = DF["food"].str.contains(query, case=False, regex=False)
|
| 37 |
direct = DF[mask]
|
| 38 |
if not direct.empty:
|
| 39 |
return direct.head(k).to_dict(orient="records")
|
| 40 |
|
| 41 |
+
# token filter
|
| 42 |
q_tokens = set(_meaningful_tokens(query))
|
| 43 |
if not q_tokens:
|
| 44 |
return [] # nothing to match on
|
|
|
|
| 48 |
if q_tokens & set(_meaningful_tokens(food))
|
| 49 |
]
|
| 50 |
|
| 51 |
+
# fuzzy rank
|
| 52 |
matches = process.extract(
|
| 53 |
query, candidates,
|
| 54 |
scorer=fuzz.token_set_ratio,
|
|
|
|
| 71 |
in2.submit(fuzzy_search, in2, out2)
|
| 72 |
|
| 73 |
# ---------- Launch ----------------------------------------------------
|
| 74 |
+
demo.launch(mcp_server=True)
|
|
|
|
|
|
|
|
|
|
|
|