Spaces:
Running
on
Zero
Running
on
Zero
safe parsing llm output
Browse files
app.py
CHANGED
|
@@ -216,47 +216,44 @@ def extract_notes(text, section_name):
|
|
| 216 |
return []
|
| 217 |
|
| 218 |
def parse_perfume_description(text: str) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
# Perfume Name
|
| 220 |
-
perfume_name =
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
-
# Tagline
|
| 223 |
-
tagline =
|
| 224 |
-
tagline = tagline.group(1).strip() if tagline else ""
|
| 225 |
|
| 226 |
# Poetic Olfactory Description
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
# General Atmosphere: stop at Image Description if present
|
| 238 |
-
general_atmosphere_match = re.search(
|
| 239 |
-
r'General Atmosphere:\s*(.*?)(?:\s*Image Description:|$)', text, re.DOTALL)
|
| 240 |
-
general_atmosphere = general_atmosphere_match.group(1).strip() if general_atmosphere_match else ""
|
| 241 |
|
| 242 |
# Image Description
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
image_desc_match = re.search(
|
| 249 |
-
r'Image Description:\s*(.*?)$', text, re.DOTALL)
|
| 250 |
-
image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
|
| 251 |
-
|
| 252 |
-
# 🗂️ Smart bullet extractor
|
| 253 |
-
|
| 254 |
top_notes = extract_notes(text, 'Top Notes')
|
| 255 |
heart_notes = extract_notes(text, 'Heart Notes')
|
| 256 |
base_notes = extract_notes(text, 'Base Notes')
|
| 257 |
|
| 258 |
result = {
|
| 259 |
'Perfume Name': perfume_name,
|
|
|
|
| 260 |
'Tagline': tagline,
|
| 261 |
'Poetic Olfactory Description': poetic_desc,
|
| 262 |
'Image Description': image_desc,
|
|
|
|
| 216 |
return []
|
| 217 |
|
| 218 |
def parse_perfume_description(text: str) -> dict:
|
| 219 |
+
def safe_search(pattern, text, flags=0):
|
| 220 |
+
match = re.search(pattern, text, flags)
|
| 221 |
+
return match.group(1).strip() if match else None
|
| 222 |
+
|
| 223 |
# Perfume Name
|
| 224 |
+
perfume_name = safe_search(r'Perfume Name:\s*(.+)', text) or ""
|
| 225 |
+
|
| 226 |
+
# Concentration Type (same style: expects `Concentration Type:`)
|
| 227 |
+
concentration_type = safe_search(r'Concentration Type:\s*(.+)', text) or ""
|
| 228 |
|
| 229 |
+
# Tagline
|
| 230 |
+
tagline = safe_search(r'Tagline:\s*"(.*?)"', text, re.DOTALL) or ""
|
|
|
|
| 231 |
|
| 232 |
# Poetic Olfactory Description
|
| 233 |
+
poetic_desc = safe_search(r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL)
|
| 234 |
+
if poetic_desc is None:
|
| 235 |
+
poetic_desc = safe_search(
|
| 236 |
+
r'Poetic Olfactory Description:\s*(.*?)\s*(Olfactory Pyramid:|Image Description:|General Atmosphere:|Concentration Type:)',
|
| 237 |
+
text, re.DOTALL) or ""
|
| 238 |
+
|
| 239 |
+
# General Atmosphere
|
| 240 |
+
general_atmosphere = safe_search(
|
| 241 |
+
r'General Atmosphere:\s*(.*?)(?:\s*Image Description:|$)',
|
| 242 |
+
text, re.DOTALL) or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
# Image Description
|
| 245 |
+
image_desc = safe_search(r'Image Description:\s*"(.*?)"', text, re.DOTALL)
|
| 246 |
+
if image_desc is None:
|
| 247 |
+
image_desc = safe_search(r'Image Description:\s*(.*?)$', text, re.DOTALL) or ""
|
| 248 |
+
|
| 249 |
+
# Notes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
top_notes = extract_notes(text, 'Top Notes')
|
| 251 |
heart_notes = extract_notes(text, 'Heart Notes')
|
| 252 |
base_notes = extract_notes(text, 'Base Notes')
|
| 253 |
|
| 254 |
result = {
|
| 255 |
'Perfume Name': perfume_name,
|
| 256 |
+
'Concentration Type': concentration_type,
|
| 257 |
'Tagline': tagline,
|
| 258 |
'Poetic Olfactory Description': poetic_desc,
|
| 259 |
'Image Description': image_desc,
|