fffiloni commited on
Commit
4ac0008
·
verified ·
1 Parent(s): 5c69bb5

add a better similarity score calculation

Browse files
Files changed (1) hide show
  1. app.py +24 -54
app.py CHANGED
@@ -300,7 +300,12 @@ df = pd.read_excel('perfume_database_cleaned.xlsx')
300
  def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
301
  """
302
  Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
303
- Returns an empty list if nothing found.
 
 
 
 
 
304
  """
305
  if isinstance(data, str):
306
  try:
@@ -311,86 +316,51 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
311
  if not isinstance(data, dict):
312
  raise TypeError("Input must be a dict or a valid JSON string")
313
 
314
- olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
315
  if not olfactory_pyramid:
316
- return [] # Safe fallback
317
 
318
  notes = []
319
  for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
320
- layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
321
  if not layer_data:
322
  continue
323
  for item in layer_data:
324
- note = item.get("note") or item.get("Note")
325
  if note:
326
  notes.append(note.strip())
327
 
 
 
 
328
  return notes
329
 
330
-
331
  from rapidfuzz import fuzz
332
 
333
- def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
334
- """
335
- Finds top N matching perfumes using fuzzy matching on notes.
336
- If no notes found or no matches, returns an informative result.
337
- """
338
- user_notes = extract_notes_for_comparison(data)
339
- if not user_notes:
340
- return pd.DataFrame([{
341
- 'brand': 'N/A',
342
- 'perfume': 'N/A',
343
- 'matching_notes': 'No notes found in input',
344
- 'match_count': 0,
345
- 'similarity_score': 0
346
- }])
347
-
348
- # Lowercase user notes
349
- user_notes_clean = [n.strip().lower() for n in user_notes]
350
-
351
- matches = []
352
- for _, row in df.iterrows():
353
- perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
354
- matched_notes = []
355
- total_ratio = 0
356
-
357
- for u_note in user_notes_clean:
358
- best_p_note = None
359
- best_ratio = 0
360
- for p_note in perfume_notes:
361
- ratio = fuzz.partial_ratio(u_note, p_note)
362
- if ratio > best_ratio:
363
- best_ratio = ratio
364
- best_p_note = p_note
365
- if best_ratio >= threshold:
366
- matched_notes.append(best_p_note)
367
- total_ratio += best_ratio
368
-
369
- matches.append({
370
- 'brand': row['brand'],
371
- 'perfume': row['perfume'],
372
- 'matching_notes': ', '.join(sorted(set(matched_notes))),
373
- 'match_count': len(set(matched_notes)),
374
- 'similarity_score': total_ratio
375
  })
376
 
377
  result = pd.DataFrame(matches)
378
  result = result[result['match_count'] > 0]
 
379
  if result.empty:
380
  return pd.DataFrame([{
381
  'brand': 'N/A',
382
  'perfume': 'N/A',
383
- 'matching_notes': 'No matching perfumes found',
384
  'match_count': 0,
385
- 'similarity_score': 0
 
 
386
  }])
387
 
388
- result = result.sort_values(by=['match_count', 'similarity_score'], ascending=False)
389
- result = result.head(top_n).reset_index(drop=True)
390
 
391
  return result
392
-
393
-
394
 
395
  def infer(image_input):
396
 
 
300
  def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
301
  """
302
  Extracts all notes from the Olfactory Pyramid section of a JSON string or dict.
303
+
304
+ Args:
305
+ data (Union[str, dict]): The JSON string or Python dict.
306
+
307
+ Returns:
308
+ list[str]: A list of extracted note names.
309
  """
310
  if isinstance(data, str):
311
  try:
 
316
  if not isinstance(data, dict):
317
  raise TypeError("Input must be a dict or a valid JSON string")
318
 
319
+ olfactory_pyramid = data.get("Olfactory Pyramid")
320
  if not olfactory_pyramid:
321
+ raise KeyError("No 'Olfactory Pyramid' found in the data")
322
 
323
  notes = []
324
  for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
325
+ layer_data = olfactory_pyramid.get(layer)
326
  if not layer_data:
327
  continue
328
  for item in layer_data:
329
+ note = item.get("note")
330
  if note:
331
  notes.append(note.strip())
332
 
333
+ if not notes:
334
+ raise ValueError("No notes found in the Olfactory Pyramid")
335
+
336
  return notes
337
 
 
338
  from rapidfuzz import fuzz
339
 
340
+ notes': ', '.join(unique_matched_notes),
341
+ 'match_count': match_count,
342
+ 'similarity_score': round(total_ratio, 2),
343
+ 'purity': round(purity, 2),
344
+ 'adjusted_score': round(adjusted_score, 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  })
346
 
347
  result = pd.DataFrame(matches)
348
  result = result[result['match_count'] > 0]
349
+
350
  if result.empty:
351
  return pd.DataFrame([{
352
  'brand': 'N/A',
353
  'perfume': 'N/A',
354
+ 'matching_notes': 'No matching notes found',
355
  'match_count': 0,
356
+ 'similarity_score': 0,
357
+ 'purity': 0,
358
+ 'adjusted_score': 0
359
  }])
360
 
361
+ result = result.sort_values(by=['adjusted_score'], ascending=False).head(top_n).reset_index(drop=True)
 
362
 
363
  return result
 
 
364
 
365
  def infer(image_input):
366