mathieunlp commited on
Commit
d2f0984
·
verified ·
1 Parent(s): 20fdf33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -1,10 +1,14 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
  from rapidfuzz import process, fuzz
4
 
5
  # Load dataset once
6
- DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
7
-
 
 
 
8
  # ---------- MCP Tools -------------------------------------------------
9
  def lookup_food(name: str):
10
  """
@@ -15,6 +19,12 @@ def lookup_food(name: str):
15
  return {"error": f"No exact match for '{name}'."}
16
  return row.iloc[0].to_dict()
17
 
 
 
 
 
 
 
18
  def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
19
  """
20
  Fuzzy-match *query* against DF['food'].
@@ -22,13 +32,13 @@ def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
22
  • Otherwise, keep only rows that share ≥1 meaningful token
23
  and score them with token_set_ratio.
24
  """
25
- # 1️⃣ direct substring hits (best precision, no scoring needed)
26
  mask = DF["food"].str.contains(query, case=False, regex=False)
27
  direct = DF[mask]
28
  if not direct.empty:
29
  return direct.head(k).to_dict(orient="records")
30
 
31
- # 2️⃣ token filter
32
  q_tokens = set(_meaningful_tokens(query))
33
  if not q_tokens:
34
  return [] # nothing to match on
@@ -38,7 +48,7 @@ def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
38
  if q_tokens & set(_meaningful_tokens(food))
39
  ]
40
 
41
- # 3️⃣ fuzzy rank
42
  matches = process.extract(
43
  query, candidates,
44
  scorer=fuzz.token_set_ratio,
@@ -61,8 +71,4 @@ with gr.Blocks(title="Purine DB MCP") as demo:
61
  in2.submit(fuzzy_search, in2, out2)
62
 
63
  # ---------- Launch ----------------------------------------------------
64
- demo.launch(
65
- server_name="0.0.0.0", # expose on container/VM
66
- share=False, # True if you want a public Gradio link
67
- mcp_server=True, # 🌟 <- THIS turns it into an MCP endpoint
68
- )
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import re
4
  from rapidfuzz import process, fuzz
5
 
6
  # Load dataset once
7
+ DF = pd.read_csv("./data/food_purine_mcp_ready_v2.csv")
8
+ STOPWORDS = {
9
+ "raw", "cooked", "boiled", "roasted", "fried", "baked",
10
+ "steamed", "grilled", "ground", "unspecified", "dried",
11
+ }
12
  # ---------- MCP Tools -------------------------------------------------
13
  def lookup_food(name: str):
14
  """
 
19
  return {"error": f"No exact match for '{name}'."}
20
  return row.iloc[0].to_dict()
21
 
22
+
23
+ def _meaningful_tokens(text: str):
24
+ """lower-cases and drops stop-words & punctuation."""
25
+ words = re.split(r"\W+", text.lower())
26
+ return [w for w in words if w and w not in STOPWORDS]
27
+
28
  def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
29
  """
30
  Fuzzy-match *query* against DF['food'].
 
32
  • Otherwise, keep only rows that share ≥1 meaningful token
33
  and score them with token_set_ratio.
34
  """
35
+ # direct substring hits (best precision, no scoring needed)
36
  mask = DF["food"].str.contains(query, case=False, regex=False)
37
  direct = DF[mask]
38
  if not direct.empty:
39
  return direct.head(k).to_dict(orient="records")
40
 
41
+ # token filter
42
  q_tokens = set(_meaningful_tokens(query))
43
  if not q_tokens:
44
  return [] # nothing to match on
 
48
  if q_tokens & set(_meaningful_tokens(food))
49
  ]
50
 
51
+ # fuzzy rank
52
  matches = process.extract(
53
  query, candidates,
54
  scorer=fuzz.token_set_ratio,
 
71
  in2.submit(fuzzy_search, in2, out2)
72
 
73
  # ---------- Launch ----------------------------------------------------
74
+ demo.launch(mcp_server=True)