fffiloni commited on
Commit
d83da9c
·
verified ·
1 Parent(s): 9f95f2e

update perfume comparison

Browse files
Files changed (1) hide show
  1. app.py +41 -56
app.py CHANGED
@@ -301,7 +301,7 @@ df = pd.read_excel('perfume_database_cleaned.xlsx')
301
 
302
  def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
303
  """
304
- Extracts all notes from the Olfactory Pyramid section.
305
  """
306
  if isinstance(data, str):
307
  try:
@@ -310,98 +310,83 @@ def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
310
  raise ValueError("Invalid JSON string provided")
311
 
312
  if not isinstance(data, dict):
313
- raise TypeError("Input must be a dict or a valid JSON string")
314
 
315
- olfactory_pyramid = data.get("Olfactory Pyramid")
316
  if not olfactory_pyramid:
317
- raise KeyError("No 'Olfactory Pyramid' found in the data")
318
 
319
  notes = []
320
  for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
321
- layer_data = olfactory_pyramid.get(layer)
322
  if not layer_data:
323
  continue
324
  for item in layer_data:
325
- note = item.get("note")
326
  if note:
327
  notes.append(note.strip())
328
 
329
- if not notes:
330
- raise ValueError("No notes found in the Olfactory Pyramid")
331
-
332
  return notes
333
 
334
- def find_best_perfumes_from_json(
335
- data: Union[str, dict],
336
- top_n: int = 5,
337
- threshold: int = 80
338
- ) -> pd.DataFrame:
339
  """
340
- Finds top N matching perfumes with purity-based similarity.
 
341
  """
342
- try:
343
- user_notes = extract_notes_for_comparison(data)
344
- except Exception as e:
345
- # Return fallback if extraction fails
346
- return pd.DataFrame([{
347
- 'brand': 'N/A',
348
- 'perfume': 'N/A',
349
- 'matching_notes': f'Error: {str(e)}',
350
- 'match_count': 0,
351
- 'purity': 0,
352
- 'adjusted_score': 0
353
- }])
354
-
355
- user_notes_clean = [n.strip().lower() for n in user_notes if n.strip()]
356
 
357
  matches = []
 
358
  for _, row in df.iterrows():
359
- perfume_notes = [
360
- n.strip().lower()
361
- for n in row['notes'].split(',')
362
- if n.strip()
363
- ]
364
 
365
- matched = []
366
  for u_note in user_notes_clean:
367
  for p_note in perfume_notes:
368
- ratio = fuzz.partial_ratio(u_note, p_note)
 
 
 
 
 
 
 
369
  if ratio >= threshold:
370
- matched.append(p_note)
371
 
372
- unique_matched_notes = sorted(set(matched))
373
- unique_matched_notes = [n for n in unique_matched_notes if n]
 
 
 
374
 
375
  total_notes = len(perfume_notes)
376
- match_count = len(unique_matched_notes)
377
- purity = match_count / total_notes if total_notes else 0
378
  adjusted_score = match_count * purity
379
 
380
- if match_count > 0:
381
- matches.append({
382
- 'brand': row['brand'],
383
- 'perfume': row['perfume'],
384
- 'matching_notes': ', '.join(unique_matched_notes),
385
- 'match_count': match_count,
386
- 'purity': round(purity, 2),
387
- 'adjusted_score': round(adjusted_score, 2)
388
- })
389
 
390
  if not matches:
 
391
  return pd.DataFrame([{
392
  'brand': 'N/A',
393
- 'perfume': 'N/A',
394
- 'matching_notes': 'No matches found',
395
  'match_count': 0,
396
  'purity': 0,
397
  'adjusted_score': 0
398
  }])
399
 
400
  result = pd.DataFrame(matches)
401
- result = result.sort_values(
402
- by=['adjusted_score', 'match_count'],
403
- ascending=[False, False]
404
- ).head(top_n).reset_index(drop=True)
405
 
406
  return result
407
 
 
301
 
302
  def extract_notes_for_comparison(data: Union[str, dict]) -> list[str]:
303
  """
304
+ Extracts notes from Olfactory Pyramid in a JSON string or dict.
305
  """
306
  if isinstance(data, str):
307
  try:
 
310
  raise ValueError("Invalid JSON string provided")
311
 
312
  if not isinstance(data, dict):
313
+ raise TypeError("Input must be a dict or valid JSON string")
314
 
315
+ olfactory_pyramid = data.get("Olfactory Pyramid") or data.get("olfactory pyramid")
316
  if not olfactory_pyramid:
317
+ return [] # No pyramid found, fail gracefully
318
 
319
  notes = []
320
  for layer in ["Top Notes", "Heart Notes", "Base Notes"]:
321
+ layer_data = olfactory_pyramid.get(layer) or olfactory_pyramid.get(layer.lower())
322
  if not layer_data:
323
  continue
324
  for item in layer_data:
325
+ note = item.get("note") or item.get("Note")
326
  if note:
327
  notes.append(note.strip())
328
 
 
 
 
329
  return notes
330
 
331
+ def find_best_perfumes_from_json(data: Union[str, dict], top_n: int = 5, threshold: int = 80):
 
 
 
 
332
  """
333
+ Fuzzy-match user notes against database notes.
334
+ Uses token_set_ratio + partial_ratio + short-word safeguard.
335
  """
336
+ user_notes = extract_notes_for_comparison(data)
337
+ user_notes_clean = [n.strip().lower() for n in user_notes]
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
  matches = []
340
+
341
  for _, row in df.iterrows():
342
+ perfume_notes = [n.strip().lower() for n in row['notes'].split(',')]
343
+ matched_notes = []
 
 
 
344
 
 
345
  for u_note in user_notes_clean:
346
  for p_note in perfume_notes:
347
+ if len(u_note) < 4:
348
+ # Very short? Require exact match
349
+ ratio = 100 if u_note == p_note else 0
350
+ else:
351
+ ratio_token = fuzz.token_set_ratio(u_note, p_note)
352
+ ratio_partial = fuzz.partial_ratio(u_note, p_note)
353
+ ratio = max(ratio_token, ratio_partial)
354
+
355
  if ratio >= threshold:
356
+ matched_notes.append(p_note)
357
 
358
+ unique_matched_notes = sorted(set(matched_notes))
359
+ match_count = len(unique_matched_notes)
360
+
361
+ if match_count == 0:
362
+ continue # Skip if no match at all
363
 
364
  total_notes = len(perfume_notes)
365
+ purity = match_count / total_notes if total_notes > 0 else 0
 
366
  adjusted_score = match_count * purity
367
 
368
+ matches.append({
369
+ 'brand': row['brand'],
370
+ 'perfume': row['perfume'],
371
+ 'matching_notes': ', '.join(unique_matched_notes).strip(', '),
372
+ 'match_count': match_count,
373
+ 'purity': round(purity, 2),
374
+ 'adjusted_score': round(adjusted_score, 2)
375
+ })
 
376
 
377
  if not matches:
378
+ # Nothing matched at all
379
  return pd.DataFrame([{
380
  'brand': 'N/A',
381
+ 'perfume': 'No match found',
382
+ 'matching_notes': '',
383
  'match_count': 0,
384
  'purity': 0,
385
  'adjusted_score': 0
386
  }])
387
 
388
  result = pd.DataFrame(matches)
389
+ result = result.sort_values(by='adjusted_score', ascending=False).head(top_n).reset_index(drop=True)
 
 
 
390
 
391
  return result
392