Spaces:

fffiloni
/

Image-to-Fragrance

Running on Zero

App Files Files Community

fffiloni commited on Jul 4

Commit

d83da9c

verified ·

1 Parent(s): 9f95f2e

update perfume comparison

Browse files

Files changed (1) hide show

app.py +41 -56

app.py CHANGED Viewed

@@ -301,7 +301,7 @@ df = pd.read_excel('perfume_database_cleaned.xlsx')
 def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     """
-    Extracts all notes from the Olfactory Pyramid section.
     """
     if isinstance(data, str):
         try:
@@ -310,98 +310,83 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
             raise ValueError("Invalid JSON string provided")
     if not isinstance(data, dict):
-        raise TypeError("Input must be a dict or a valid JSON string")
-    olfactory_pyramid = data.get("Olfactory Pyramid")
     if not olfactory_pyramid:
-        raise KeyError("No 'Olfactory Pyramid' found in the data")
     notes = []
     for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
-        layer_data = olfactory_pyramid.get(layer)
         if not layer_data:
             continue
         for item in layer_data:
-            note = item.get("note")
             if note:
                 notes.append(note.strip())
-    if not notes:
-        raise ValueError("No notes found in the Olfactory Pyramid")
     return notes
-def find_best_perfumes_from_json(
-    data: Union[str, dict],
-    top_n: int = 5,
-    threshold: int = 80
-) -> pd.DataFrame:
     """
-    Finds top N matching perfumes with purity-based similarity.
     """
-    try:
-        user_notes = extract_notes_for_comparison(data)
-    except Exception as e:
-        # Return fallback if extraction fails
-        return pd.DataFrame([{
-            'brand': 'N/A',
-            'perfume': 'N/A',
-            'matching_notes': f'Error: {str(e)}',
-            'match_count': 0,
-            'purity': 0,
-            'adjusted_score': 0
-        }])
-    user_notes_clean = [n.strip().lower() for n in user_notes if n.strip()]
     matches = []
     for _, row in df.iterrows():
-        perfume_notes = [
-            n.strip().lower()
-            for n in row['notes'].split(',')
-            if n.strip()
-        ]
-        matched = []
         for u_note in user_notes_clean:
             for p_note in perfume_notes:
-                ratio = fuzz.partial_ratio(u_note, p_note)
                 if ratio >= threshold:
-                    matched.append(p_note)
-        unique_matched_notes = sorted(set(matched))
-        unique_matched_notes = [n for n in unique_matched_notes if n]
         total_notes = len(perfume_notes)
-        match_count = len(unique_matched_notes)
-        purity = match_count / total_notes if total_notes else 0
         adjusted_score = match_count * purity
-        if match_count > 0:
-            matches.append({
-                'brand': row['brand'],
-                'perfume': row['perfume'],
-                'matching_notes': ', '.join(unique_matched_notes),
-                'match_count': match_count,
-                'purity': round(purity, 2),
-                'adjusted_score': round(adjusted_score, 2)
-            })
     if not matches:
         return pd.DataFrame([{
             'brand': 'N/A',
-            'perfume': 'N/A',
-            'matching_notes': 'No matches found',
             'match_count': 0,
             'purity': 0,
             'adjusted_score': 0
         }])
     result = pd.DataFrame(matches)
-    result = result.sort_values(
-        by=['adjusted_score', 'match_count'],
-        ascending=[False, False]
-    ).head(top_n).reset_index(drop=True)
     return result

 def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     """
+    Extracts notes from Olfactory Pyramid in a JSON string or dict.
     """
     if isinstance(data, str):
         try:
             raise ValueError("Invalid JSON string provided")
     if not isinstance(data, dict):
+        raise TypeError("Input must be a dict or valid JSON string")
+    olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
     if not olfactory_pyramid:
+        return []  # No pyramid found, fail gracefully
     notes = []
     for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
+        layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
         if not layer_data:
             continue
         for item in layer_data:
+            note = item.get("note") or item.get("Note")
             if note:
                 notes.append(note.strip())
     return notes
+def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
     """
+    Fuzzy-match user notes against database notes.
+    Uses token_set_ratio + partial_ratio + short-word safeguard.
     """
+    user_notes = extract_notes_for_comparison(data)
+    user_notes_clean = [n.strip().lower() for n in user_notes]
     matches = []
     for _, row in df.iterrows():
+        perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
+        matched_notes = []
         for u_note in user_notes_clean:
             for p_note in perfume_notes:
+                if len(u_note) < 4:
+                    # Very short? Require exact match
+                    ratio = 100 if u_note == p_note else 0
+                else:
+                    ratio_token = fuzz.token_set_ratio(u_note, p_note)
+                    ratio_partial = fuzz.partial_ratio(u_note, p_note)
+                    ratio = max(ratio_token, ratio_partial)
                 if ratio >= threshold:
+                    matched_notes.append(p_note)
+        unique_matched_notes = sorted(set(matched_notes))
+        match_count = len(unique_matched_notes)
+        if match_count == 0:
+            continue  # Skip if no match at all
         total_notes = len(perfume_notes)
+        purity = match_count / total_notes if total_notes > 0 else 0
         adjusted_score = match_count * purity
+        matches.append({
+            'brand': row['brand'],
+            'perfume': row['perfume'],
+            'matching_notes': ', '.join(unique_matched_notes).strip(', '),
+            'match_count': match_count,
+            'purity': round(purity, 2),
+            'adjusted_score': round(adjusted_score, 2)
+        })
     if not matches:
+        # Nothing matched at all
         return pd.DataFrame([{
             'brand': 'N/A',
+            'perfume': 'No match found',
+            'matching_notes': '',
             'match_count': 0,
             'purity': 0,
             'adjusted_score': 0
         }])
     result = pd.DataFrame(matches)
+    result = result.sort_values(by='adjusted_score', ascending=False).head(top_n).reset_index(drop=True)
     return result