fffiloni commited on
Commit
866755a
·
verified ·
1 Parent(s): 061f3b8

regex grab properly indented notes

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -72,13 +72,13 @@ def parse_perfume_description(text: str) -> dict:
72
  poetic_desc = re.search(r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL).group(1).strip()
73
  general_atmosphere = re.search(r'General Atmosphere:\s*(.+)', text, re.DOTALL).group(1).strip()
74
 
75
- # Helper for pyramid sections
76
  def extract_notes(section_name):
77
- pattern = rf'{section_name}:\s*((?:\*\s.*\n?)+)'
78
- match = re.search(pattern, text)
79
  if not match:
80
  return []
81
- notes_text = match.group(1).strip()
82
  notes = re.findall(r'\*\s*(.*?):\s*(.*)', notes_text)
83
  return [{'note': note.strip(), 'description': desc.strip()} for note, desc in notes]
84
 
 
72
  poetic_desc = re.search(r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL).group(1).strip()
73
  general_atmosphere = re.search(r'General Atmosphere:\s*(.+)', text, re.DOTALL).group(1).strip()
74
 
75
+ # More flexible notes extractor
76
  def extract_notes(section_name):
77
+ pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
78
+ match = re.search(pattern, text, re.MULTILINE)
79
  if not match:
80
  return []
81
+ notes_text = match.group(1)
82
  notes = re.findall(r'\*\s*(.*?):\s*(.*)', notes_text)
83
  return [{'note': note.strip(), 'description': desc.strip()} for note, desc in notes]
84