fffiloni commited on
Commit
a38394c
·
verified ·
1 Parent(s): 6da2f84

safe parsing llm output

Browse files
Files changed (1) hide show
  1. app.py +26 -29
app.py CHANGED
@@ -216,47 +216,44 @@ def extract_notes(text, section_name):
216
  return []
217
 
218
  def parse_perfume_description(text: str) -> dict:
 
 
 
 
219
  # Perfume Name
220
- perfume_name = re.search(r'Perfume Name:\s*(.+)', text).group(1).strip()
 
 
 
221
 
222
- # Tagline (quoted)
223
- tagline = re.search(r'Tagline:\s*"(.*?)"', text, re.DOTALL)
224
- tagline = tagline.group(1).strip() if tagline else ""
225
 
226
  # Poetic Olfactory Description
227
- poetic_desc_match = re.search(
228
- r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL)
229
- if poetic_desc_match:
230
- poetic_desc = poetic_desc_match.group(1).strip()
231
- else:
232
- poetic_desc_match = re.search(
233
- r'Poetic Olfactory Description:\s*(.*?)\s*(Olfactory Pyramid:|Image Description:|General Atmosphere:)',
234
- text, re.DOTALL)
235
- poetic_desc = poetic_desc_match.group(1).strip() if poetic_desc_match else ""
236
-
237
- # General Atmosphere: stop at Image Description if present
238
- general_atmosphere_match = re.search(
239
- r'General Atmosphere:\s*(.*?)(?:\s*Image Description:|$)', text, re.DOTALL)
240
- general_atmosphere = general_atmosphere_match.group(1).strip() if general_atmosphere_match else ""
241
 
242
  # Image Description
243
- image_desc_match = re.search(
244
- r'Image Description:\s*"(.*?)"', text, re.DOTALL)
245
- if image_desc_match:
246
- image_desc = image_desc_match.group(1).strip()
247
- else:
248
- image_desc_match = re.search(
249
- r'Image Description:\s*(.*?)$', text, re.DOTALL)
250
- image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
251
-
252
- # 🗂️ Smart bullet extractor
253
-
254
  top_notes = extract_notes(text, 'Top Notes')
255
  heart_notes = extract_notes(text, 'Heart Notes')
256
  base_notes = extract_notes(text, 'Base Notes')
257
 
258
  result = {
259
  'Perfume Name': perfume_name,
 
260
  'Tagline': tagline,
261
  'Poetic Olfactory Description': poetic_desc,
262
  'Image Description': image_desc,
 
216
  return []
217
 
218
  def parse_perfume_description(text: str) -> dict:
219
+ def safe_search(pattern, text, flags=0):
220
+ match = re.search(pattern, text, flags)
221
+ return match.group(1).strip() if match else None
222
+
223
  # Perfume Name
224
+ perfume_name = safe_search(r'Perfume Name:\s*(.+)', text) or ""
225
+
226
+ # Concentration Type (same style: expects `Concentration Type:`)
227
+ concentration_type = safe_search(r'Concentration Type:\s*(.+)', text) or ""
228
 
229
+ # Tagline
230
+ tagline = safe_search(r'Tagline:\s*"(.*?)"', text, re.DOTALL) or ""
 
231
 
232
  # Poetic Olfactory Description
233
+ poetic_desc = safe_search(r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL)
234
+ if poetic_desc is None:
235
+ poetic_desc = safe_search(
236
+ r'Poetic Olfactory Description:\s*(.*?)\s*(Olfactory Pyramid:|Image Description:|General Atmosphere:|Concentration Type:)',
237
+ text, re.DOTALL) or ""
238
+
239
+ # General Atmosphere
240
+ general_atmosphere = safe_search(
241
+ r'General Atmosphere:\s*(.*?)(?:\s*Image Description:|$)',
242
+ text, re.DOTALL) or ""
 
 
 
 
243
 
244
  # Image Description
245
+ image_desc = safe_search(r'Image Description:\s*"(.*?)"', text, re.DOTALL)
246
+ if image_desc is None:
247
+ image_desc = safe_search(r'Image Description:\s*(.*?)$', text, re.DOTALL) or ""
248
+
249
+ # Notes
 
 
 
 
 
 
250
  top_notes = extract_notes(text, 'Top Notes')
251
  heart_notes = extract_notes(text, 'Heart Notes')
252
  base_notes = extract_notes(text, 'Base Notes')
253
 
254
  result = {
255
  'Perfume Name': perfume_name,
256
+ 'Concentration Type': concentration_type,
257
  'Tagline': tagline,
258
  'Poetic Olfactory Description': poetic_desc,
259
  'Image Description': image_desc,