fffiloni commited on
Commit
24b0f60
·
verified ·
1 Parent(s): f9973c1

safety handling incase of no notes found for brand comparison

Browse files
Files changed (1) hide show
  1. app.py +39 -23
app.py CHANGED
@@ -300,10 +300,7 @@ df = pd.read_excel('perfume_database_cleaned.xlsx')
300
  def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
301
  """
302
  Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
303
- Args:
304
- data (Union[str, dict]): The JSON string or Python dict.
305
- Returns:
306
- list[str]: A list of extracted note names.
307
  """
308
  if isinstance(data, str):
309
  try:
@@ -316,64 +313,83 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
316
 
317
  olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
318
  if not olfactory_pyramid:
319
- raise KeyError("No 'Olfactory Pyramid' found in the data")
320
 
321
  notes = []
322
  for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
323
  layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
324
  if not layer_data:
325
- continue # If a layer is missing, just skip
326
  for item in layer_data:
327
  note = item.get("note") or item.get("Note")
328
  if note:
329
  notes.append(note.strip())
330
 
331
- if not notes:
332
- raise ValueError("No notes found in the Olfactory Pyramid")
333
-
334
  return notes
 
335
 
336
  from rapidfuzz import fuzz
337
 
338
  def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
339
  """
340
  Finds top N matching perfumes using fuzzy matching on notes.
341
- Args:
342
- data (Union[str, dict]): The input JSON or dict.
343
- top_n (int): Number of results.
344
- threshold (int): Minimum fuzz ratio to count as match.
345
- Returns:
346
- pd.DataFrame
347
  """
348
  user_notes = extract_notes_for_comparison(data)
349
- user_notes_clean = [n.strip().lower() for n in user_notes]
 
 
 
 
 
 
 
 
 
350
 
351
  matches = []
352
  for _, row in df.iterrows():
353
  perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
354
  matched_notes = []
 
355
 
356
  for u_note in user_notes_clean:
 
 
357
  for p_note in perfume_notes:
358
  ratio = fuzz.partial_ratio(u_note, p_note)
359
- if ratio >= threshold:
360
- matched_notes.append(p_note)
361
-
362
- matched_notes = sorted(set(matched_notes))
 
 
363
 
364
  matches.append({
365
  'brand': row['brand'],
366
  'perfume': row['perfume'],
367
- 'matching_notes': ', '.join(matched_notes),
368
- 'match_count': len(matched_notes)
 
369
  })
370
 
371
  result = pd.DataFrame(matches)
372
  result = result[result['match_count'] > 0]
373
- result = result.sort_values(by='match_count', ascending=False).head(top_n).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
374
 
375
  return result
376
 
 
377
 
378
  def infer(image_input):
379
 
 
300
  def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
301
  """
302
  Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
303
+ Returns an empty list if nothing found.
 
 
 
304
  """
305
  if isinstance(data, str):
306
  try:
 
313
 
314
  olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
315
  if not olfactory_pyramid:
316
+ return [] # Safely return empty
317
 
318
  notes = []
319
  for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
320
  layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
321
  if not layer_data:
322
+ continue
323
  for item in layer_data:
324
  note = item.get("note") or item.get("Note")
325
  if note:
326
  notes.append(note.strip())
327
 
 
 
 
328
  return notes
329
+
330
 
331
  from rapidfuzz import fuzz
332
 
333
  def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
334
  """
335
  Finds top N matching perfumes using fuzzy matching on notes.
336
+ If no notes found, returns an empty dataframe with message.
 
 
 
 
 
337
  """
338
  user_notes = extract_notes_for_comparison(data)
339
+ if not user_notes:
340
+ return pd.DataFrame([{
341
+ 'brand': 'N/A',
342
+ 'perfume': 'N/A',
343
+ 'matching_notes': 'No notes found in input',
344
+ 'match_count': 0,
345
+ 'similarity_score': 0
346
+ }])
347
+
348
+ user_notes_clean = [apply_note_synonyms(n) for n in user_notes]
349
 
350
  matches = []
351
  for _, row in df.iterrows():
352
  perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
353
  matched_notes = []
354
+ total_ratio = 0
355
 
356
  for u_note in user_notes_clean:
357
+ best_p_note = None
358
+ best_ratio = 0
359
  for p_note in perfume_notes:
360
  ratio = fuzz.partial_ratio(u_note, p_note)
361
+ if ratio > best_ratio:
362
+ best_ratio = ratio
363
+ best_p_note = p_note
364
+ if best_ratio >= threshold:
365
+ matched_notes.append(best_p_note)
366
+ total_ratio += best_ratio
367
 
368
  matches.append({
369
  'brand': row['brand'],
370
  'perfume': row['perfume'],
371
+ 'matching_notes': ', '.join(sorted(set(matched_notes))),
372
+ 'match_count': len(set(matched_notes)),
373
+ 'similarity_score': total_ratio
374
  })
375
 
376
  result = pd.DataFrame(matches)
377
  result = result[result['match_count'] > 0]
378
+ if result.empty:
379
+ return pd.DataFrame([{
380
+ 'brand': 'N/A',
381
+ 'perfume': 'N/A',
382
+ 'matching_notes': 'No matching perfumes found',
383
+ 'match_count': 0,
384
+ 'similarity_score': 0
385
+ }])
386
+
387
+ result = result.sort_values(by=['match_count', 'similarity_score'], ascending=False)
388
+ result = result.head(top_n).reset_index(drop=True)
389
 
390
  return result
391
 
392
+
393
 
394
  def infer(image_input):
395