Spaces:

fffiloni
/

Image-to-Fragrance

Running on Zero

App Files Files Community

fffiloni commited on Jul 4

Commit

4ac0008

verified ·

1 Parent(s): 5c69bb5

add a better similarity score calculation

Browse files

Files changed (1) hide show

app.py +24 -54

app.py CHANGED Viewed

@@ -300,7 +300,12 @@ df = pd.read_excel('perfume_database_cleaned.xlsx')
 def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     """
     Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
-    Returns an empty list if nothing found.
     """
     if isinstance(data, str):
         try:
@@ -311,86 +316,51 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     if not isinstance(data, dict):
         raise TypeError("Input must be a dict or a valid JSON string")
-    olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
     if not olfactory_pyramid:
-        return []  # Safe fallback
     notes = []
     for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
-        layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
         if not layer_data:
             continue
         for item in layer_data:
-            note = item.get("note") or item.get("Note")
             if note:
                 notes.append(note.strip())
     return notes
 from rapidfuzz import fuzz
-def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
-    """
-    Finds top N matching perfumes using fuzzy matching on notes.
-    If no notes found or no matches, returns an informative result.
-    """
-    user_notes = extract_notes_for_comparison(data)
-    if not user_notes:
-        return pd.DataFrame([{
-            'brand': 'N/A',
-            'perfume': 'N/A',
-            'matching_notes': 'No notes found in input',
-            'match_count': 0,
-            'similarity_score': 0
-        }])
-    # Lowercase user notes
-    user_notes_clean = [n.strip().lower() for n in user_notes]
-    matches = []
-    for _, row in df.iterrows():
-        perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
-        matched_notes = []
-        total_ratio = 0
-        for u_note in user_notes_clean:
-            best_p_note = None
-            best_ratio = 0
-            for p_note in perfume_notes:
-                ratio = fuzz.partial_ratio(u_note, p_note)
-                if ratio > best_ratio:
-                    best_ratio = ratio
-                    best_p_note = p_note
-            if best_ratio >= threshold:
-                matched_notes.append(best_p_note)
-                total_ratio += best_ratio
-        matches.append({
-            'brand': row['brand'],
-            'perfume': row['perfume'],
-            'matching_notes': ', '.join(sorted(set(matched_notes))),
-            'match_count': len(set(matched_notes)),
-            'similarity_score': total_ratio
         })
     result = pd.DataFrame(matches)
     result = result[result['match_count'] > 0]
     if result.empty:
         return pd.DataFrame([{
             'brand': 'N/A',
             'perfume': 'N/A',
-            'matching_notes': 'No matching perfumes found',
             'match_count': 0,
-            'similarity_score': 0
         }])
-    result = result.sort_values(by=['match_count', 'similarity_score'], ascending=False)
-    result = result.head(top_n).reset_index(drop=True)
     return result
 def infer(image_input):

 def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
     """
     Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
+    Args:
+        data (Union[str, dict]): The JSON string or Python dict.
+    Returns:
+        list[str]: A list of extracted note names.
     """
     if isinstance(data, str):
         try:
     if not isinstance(data, dict):
         raise TypeError("Input must be a dict or a valid JSON string")
+    olfactory_pyramid = data.get("Olfactory Pyramid")
     if not olfactory_pyramid:
+        raise KeyError("No 'Olfactory Pyramid' found in the data")
     notes = []
     for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
+        layer_data = olfactory_pyramid.get(layer)
         if not layer_data:
             continue
         for item in layer_data:
+            note = item.get("note")
             if note:
                 notes.append(note.strip())
+    if not notes:
+        raise ValueError("No notes found in the Olfactory Pyramid")
     return notes
 from rapidfuzz import fuzz
+notes': ', '.join(unique_matched_notes),
+            'match_count': match_count,
+            'similarity_score': round(total_ratio, 2),
+            'purity': round(purity, 2),
+            'adjusted_score': round(adjusted_score, 2)
         })
     result = pd.DataFrame(matches)
     result = result[result['match_count'] > 0]
     if result.empty:
         return pd.DataFrame([{
             'brand': 'N/A',
             'perfume': 'N/A',
+            'matching_notes': 'No matching notes found',
             'match_count': 0,
+            'similarity_score': 0,
+            'purity': 0,
+            'adjusted_score': 0
         }])
+    result = result.sort_values(by=['adjusted_score'], ascending=False).head(top_n).reset_index(drop=True)
     return result
 def infer(image_input):