fffiloni commited on
Commit
9065d18
·
verified ·
1 Parent(s): 866755a

handle the case where llm do not provide note description

Browse files
Files changed (1) hide show
  1. app.py +32 -8
app.py CHANGED
@@ -66,27 +66,51 @@ Here is the scene description to analyze:
66
  return cleaned_text
67
 
68
  def parse_perfume_description(text: str) -> dict:
69
- # Extract main fields
70
  perfume_name = re.search(r'Perfume Name:\s*(.+)', text).group(1).strip()
71
- tagline = re.search(r'Tagline:\s*"(.*?)"', text, re.DOTALL).group(1).strip()
72
- poetic_desc = re.search(r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL).group(1).strip()
73
- general_atmosphere = re.search(r'General Atmosphere:\s*(.+)', text, re.DOTALL).group(1).strip()
74
 
75
- # More flexible notes extractor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def extract_notes(section_name):
77
  pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
78
  match = re.search(pattern, text, re.MULTILINE)
79
  if not match:
80
  return []
81
  notes_text = match.group(1)
82
- notes = re.findall(r'\*\s*(.*?):\s*(.*)', notes_text)
83
- return [{'note': note.strip(), 'description': desc.strip()} for note, desc in notes]
 
 
 
 
 
 
 
 
84
 
85
  top_notes = extract_notes('Top Notes')
86
  heart_notes = extract_notes('Heart Notes')
87
  base_notes = extract_notes('Base Notes')
88
 
89
- # Build final JSON structure
90
  result = {
91
  'Perfume Name': perfume_name,
92
  'Tagline': tagline,
 
66
  return cleaned_text
67
 
68
  def parse_perfume_description(text: str) -> dict:
69
+ # Perfume Name
70
  perfume_name = re.search(r'Perfume Name:\s*(.+)', text).group(1).strip()
 
 
 
71
 
72
+ # Tagline (still expects quotes)
73
+ tagline = re.search(r'Tagline:\s*"(.*?)"', text, re.DOTALL)
74
+ tagline = tagline.group(1).strip() if tagline else ""
75
+
76
+ # Poetic Olfactory Description: allow quoted or plain
77
+ poetic_desc_match = re.search(
78
+ r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL)
79
+ if poetic_desc_match:
80
+ poetic_desc = poetic_desc_match.group(1).strip()
81
+ else:
82
+ # fallback: get everything until next section
83
+ poetic_desc_match = re.search(
84
+ r'Poetic Olfactory Description:\s*(.*?)\s*Olfactory Pyramid:', text, re.DOTALL)
85
+ poetic_desc = poetic_desc_match.group(1).strip() if poetic_desc_match else ""
86
+
87
+ # General Atmosphere: same idea
88
+ general_atmosphere_match = re.search(
89
+ r'General Atmosphere:\s*(.+)', text, re.DOTALL)
90
+ general_atmosphere = general_atmosphere_match.group(1).strip() if general_atmosphere_match else ""
91
+
92
+ # Flexible note extractor for bullet points
93
  def extract_notes(section_name):
94
  pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
95
  match = re.search(pattern, text, re.MULTILINE)
96
  if not match:
97
  return []
98
  notes_text = match.group(1)
99
+ notes = []
100
+ for line in notes_text.strip().splitlines():
101
+ bullet = line.strip().lstrip('*').strip()
102
+ # If it has ":", split note & description
103
+ if ':' in bullet:
104
+ note, desc = bullet.split(':', 1)
105
+ notes.append({'note': note.strip(), 'description': desc.strip()})
106
+ else:
107
+ notes.append({'note': bullet, 'description': ''})
108
+ return notes
109
 
110
  top_notes = extract_notes('Top Notes')
111
  heart_notes = extract_notes('Heart Notes')
112
  base_notes = extract_notes('Base Notes')
113
 
 
114
  result = {
115
  'Perfume Name': perfume_name,
116
  'Tagline': tagline,