fffiloni commited on
Commit
fb4337f
·
verified ·
1 Parent(s): 4ac0008
Files changed (1) hide show
  1. app.py +58 -1
app.py CHANGED
@@ -337,7 +337,64 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
337
 
338
  from rapidfuzz import fuzz
339
 
340
- notes': ', '.join(unique_matched_notes),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  'match_count': match_count,
342
  'similarity_score': round(total_ratio, 2),
343
  'purity': round(purity, 2),
 
337
 
338
  from rapidfuzz import fuzz
339
 
340
+ def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80) -> pd.DataFrame:
341
+ """
342
+ Finds top N matching perfumes using fuzzy matching on notes, with purity factor.
343
+
344
+ Args:
345
+ data (Union[str, dict]): The input JSON or dict.
346
+ top_n (int): Number of results to return.
347
+ threshold (int): Minimum fuzz ratio for note match.
348
+
349
+ Returns:
350
+ pd.DataFrame: Matching perfumes.
351
+ """
352
+ try:
353
+ user_notes = extract_notes_for_comparison(data)
354
+ except (KeyError, ValueError) as e:
355
+ return pd.DataFrame([{
356
+ 'brand': 'N/A',
357
+ 'perfume': 'N/A',
358
+ 'matching_notes': str(e),
359
+ 'match_count': 0,
360
+ 'similarity_score': 0,
361
+ 'purity': 0,
362
+ 'adjusted_score': 0
363
+ }])
364
+
365
+ user_notes_clean = [n.strip().lower() for n in user_notes]
366
+
367
+ matches = []
368
+
369
+ for _, row in df.iterrows():
370
+ perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
371
+ total_perfume_notes = len(perfume_notes)
372
+
373
+ matched_notes = []
374
+ total_ratio = 0
375
+
376
+ for u_note in user_notes_clean:
377
+ best_ratio = 0
378
+ best_p_note = None
379
+ for p_note in perfume_notes:
380
+ ratio = fuzz.partial_ratio(u_note, p_note)
381
+ if ratio > best_ratio:
382
+ best_ratio = ratio
383
+ best_p_note = p_note
384
+ if best_ratio >= threshold and best_p_note:
385
+ matched_notes.append(best_p_note)
386
+ total_ratio += best_ratio
387
+
388
+ unique_matched_notes = sorted(set(matched_notes))
389
+ match_count = len(unique_matched_notes)
390
+
391
+ purity = match_count / total_perfume_notes if total_perfume_notes > 0 else 0
392
+ adjusted_score = purity * total_ratio
393
+
394
+ matches.append({
395
+ 'brand': row['brand'],
396
+ 'perfume': row['perfume'],
397
+ 'matching_notes': ', '.join(unique_matched_notes),
398
  'match_count': match_count,
399
  'similarity_score': round(total_ratio, 2),
400
  'purity': round(purity, 2),