Spaces:
Running
on
Zero
Running
on
Zero
safety handling incase of no notes found for brand comparison
Browse files
app.py
CHANGED
@@ -300,10 +300,7 @@ df = pd.read_excel('perfume_database_cleaned.xlsx')
|
|
300 |
def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
|
301 |
"""
|
302 |
Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
|
303 |
-
|
304 |
-
data (Union[str, dict]): The JSON string or Python dict.
|
305 |
-
Returns:
|
306 |
-
list[str]: A list of extracted note names.
|
307 |
"""
|
308 |
if isinstance(data, str):
|
309 |
try:
|
@@ -316,64 +313,83 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
|
|
316 |
|
317 |
olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
|
318 |
if not olfactory_pyramid:
|
319 |
-
|
320 |
|
321 |
notes = []
|
322 |
for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
|
323 |
layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
|
324 |
if not layer_data:
|
325 |
-
continue
|
326 |
for item in layer_data:
|
327 |
note = item.get("note") or item.get("Note")
|
328 |
if note:
|
329 |
notes.append(note.strip())
|
330 |
|
331 |
-
if not notes:
|
332 |
-
raise ValueError("No notes found in the Olfactory Pyramid")
|
333 |
-
|
334 |
return notes
|
|
|
335 |
|
336 |
from rapidfuzz import fuzz
|
337 |
|
338 |
def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
|
339 |
"""
|
340 |
Finds top N matching perfumes using fuzzy matching on notes.
|
341 |
-
|
342 |
-
data (Union[str, dict]): The input JSON or dict.
|
343 |
-
top_n (int): Number of results.
|
344 |
-
threshold (int): Minimum fuzz ratio to count as match.
|
345 |
-
Returns:
|
346 |
-
pd.DataFrame
|
347 |
"""
|
348 |
user_notes = extract_notes_for_comparison(data)
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
matches = []
|
352 |
for _, row in df.iterrows():
|
353 |
perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
|
354 |
matched_notes = []
|
|
|
355 |
|
356 |
for u_note in user_notes_clean:
|
|
|
|
|
357 |
for p_note in perfume_notes:
|
358 |
ratio = fuzz.partial_ratio(u_note, p_note)
|
359 |
-
if ratio
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
|
|
363 |
|
364 |
matches.append({
|
365 |
'brand': row['brand'],
|
366 |
'perfume': row['perfume'],
|
367 |
-
'matching_notes': ', '.join(matched_notes),
|
368 |
-
'match_count': len(matched_notes)
|
|
|
369 |
})
|
370 |
|
371 |
result = pd.DataFrame(matches)
|
372 |
result = result[result['match_count'] > 0]
|
373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
return result
|
376 |
|
|
|
377 |
|
378 |
def infer(image_input):
|
379 |
|
|
|
300 |
def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
|
301 |
"""
|
302 |
Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
|
303 |
+
Returns an empty list if nothing found.
|
|
|
|
|
|
|
304 |
"""
|
305 |
if isinstance(data, str):
|
306 |
try:
|
|
|
313 |
|
314 |
olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
|
315 |
if not olfactory_pyramid:
|
316 |
+
return [] # Safely return empty
|
317 |
|
318 |
notes = []
|
319 |
for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
|
320 |
layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
|
321 |
if not layer_data:
|
322 |
+
continue
|
323 |
for item in layer_data:
|
324 |
note = item.get("note") or item.get("Note")
|
325 |
if note:
|
326 |
notes.append(note.strip())
|
327 |
|
|
|
|
|
|
|
328 |
return notes
|
329 |
+
|
330 |
|
331 |
from rapidfuzz import fuzz
|
332 |
|
333 |
def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
|
334 |
"""
|
335 |
Finds top N matching perfumes using fuzzy matching on notes.
|
336 |
+
If no notes found, returns an empty dataframe with message.
|
|
|
|
|
|
|
|
|
|
|
337 |
"""
|
338 |
user_notes = extract_notes_for_comparison(data)
|
339 |
+
if not user_notes:
|
340 |
+
return pd.DataFrame([{
|
341 |
+
'brand': 'N/A',
|
342 |
+
'perfume': 'N/A',
|
343 |
+
'matching_notes': 'No notes found in input',
|
344 |
+
'match_count': 0,
|
345 |
+
'similarity_score': 0
|
346 |
+
}])
|
347 |
+
|
348 |
+
user_notes_clean = [apply_note_synonyms(n) for n in user_notes]
|
349 |
|
350 |
matches = []
|
351 |
for _, row in df.iterrows():
|
352 |
perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
|
353 |
matched_notes = []
|
354 |
+
total_ratio = 0
|
355 |
|
356 |
for u_note in user_notes_clean:
|
357 |
+
best_p_note = None
|
358 |
+
best_ratio = 0
|
359 |
for p_note in perfume_notes:
|
360 |
ratio = fuzz.partial_ratio(u_note, p_note)
|
361 |
+
if ratio > best_ratio:
|
362 |
+
best_ratio = ratio
|
363 |
+
best_p_note = p_note
|
364 |
+
if best_ratio >= threshold:
|
365 |
+
matched_notes.append(best_p_note)
|
366 |
+
total_ratio += best_ratio
|
367 |
|
368 |
matches.append({
|
369 |
'brand': row['brand'],
|
370 |
'perfume': row['perfume'],
|
371 |
+
'matching_notes': ', '.join(sorted(set(matched_notes))),
|
372 |
+
'match_count': len(set(matched_notes)),
|
373 |
+
'similarity_score': total_ratio
|
374 |
})
|
375 |
|
376 |
result = pd.DataFrame(matches)
|
377 |
result = result[result['match_count'] > 0]
|
378 |
+
if result.empty:
|
379 |
+
return pd.DataFrame([{
|
380 |
+
'brand': 'N/A',
|
381 |
+
'perfume': 'N/A',
|
382 |
+
'matching_notes': 'No matching perfumes found',
|
383 |
+
'match_count': 0,
|
384 |
+
'similarity_score': 0
|
385 |
+
}])
|
386 |
+
|
387 |
+
result = result.sort_values(by=['match_count', 'similarity_score'], ascending=False)
|
388 |
+
result = result.head(top_n).reset_index(drop=True)
|
389 |
|
390 |
return result
|
391 |
|
392 |
+
|
393 |
|
394 |
def infer(image_input):
|
395 |
|