mathieunlp commited on
Commit
bce2b97
·
verified ·
1 Parent(s): 2b22ba6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -14
app.py CHANGED
@@ -1,27 +1,40 @@
1
  import gradio as gr
2
- import pandas as pd
3
- import re
4
  from rapidfuzz import process, fuzz
 
5
 
6
- # Load dataset once
7
  DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
8
- STOPWORDS = {
9
- "raw", "cooked", "boiled", "roasted", "fried", "baked",
10
- "steamed", "grilled", "ground", "unspecified", "dried",
11
- }
12
- # ---------- MCP Tools -------------------------------------------------
13
  def _meaningful_tokens(text: str):
14
- """lower-cases and drops stop-words & punctuation."""
15
  words = re.split(r"\W+", text.lower())
16
  return [w for w in words if w and w not in STOPWORDS]
17
 
18
- def fuzzy_search(query: str, k: int = 5, cutoff: int = 75):
 
 
 
 
 
 
 
 
 
19
  """
20
- Fuzzy-match *query* against DF['food'].
21
- • First, return any rows that contain the whole query as a substring.
22
- Otherwise, keep only rows that share ≥1 meaningful token
23
- and score them with token_set_ratio.
 
 
 
 
 
 
 
 
 
24
  """
 
25
  # direct substring hits (best precision, no scoring needed)
26
  mask = DF["food"].str.contains(query, case=False, regex=False)
27
  direct = DF[mask]
 
1
  import gradio as gr
 
 
2
  from rapidfuzz import process, fuzz
3
+ import pandas as pd, re
4
 
 
5
  DF = pd.read_csv("./food_purine_mcp_ready_v2.csv")
6
+ STOPWORDS = {...}
7
+
 
 
 
8
  def _meaningful_tokens(text: str):
 
9
  words = re.split(r"\W+", text.lower())
10
  return [w for w in words if w and w not in STOPWORDS]
11
 
12
+ @gr.tools( # NEW turns the fn into an MCP tool
13
+ name="purine_ingredients_fuzzy_search",
14
+ description="Return up to `k` food-items that fuzzy-match the query and their "
15
+ "purine data (mg/100 g)."
16
+ )
17
+ def fuzzy_search(
18
+ query: str, # required → will appear in the schema
19
+ k: int = 5,
20
+ cutoff: int = 75
21
+ ) -> list[dict]:
22
  """
23
+ Parameters
24
+ ----------
25
+ query : str
26
+ Ingredient or dish name to look up.
27
+ k : int, optional
28
+ Maximum rows to return (default = 5).
29
+ cutoff : int, optional
30
+ Minimal RapidFuzz token-set ratio (default = 75).
31
+
32
+ Returns
33
+ -------
34
+ list[dict]
35
+ Each dict contains 'food', 'purines_mg_per_100g', and 'classification'.
36
  """
37
+
38
  # direct substring hits (best precision, no scoring needed)
39
  mask = DF["food"].str.contains(query, case=False, regex=False)
40
  direct = DF[mask]