Spaces:
Running
on
Zero
Running
on
Zero
regex grab properly indented notes
Browse files
app.py
CHANGED
@@ -72,13 +72,13 @@ def parse_perfume_description(text: str) -> dict:
|
|
72 |
poetic_desc = re.search(r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL).group(1).strip()
|
73 |
general_atmosphere = re.search(r'General Atmosphere:\s*(.+)', text, re.DOTALL).group(1).strip()
|
74 |
|
75 |
-
#
|
76 |
def extract_notes(section_name):
|
77 |
-
pattern = rf'{section_name}:\s
|
78 |
-
match = re.search(pattern, text)
|
79 |
if not match:
|
80 |
return []
|
81 |
-
notes_text = match.group(1)
|
82 |
notes = re.findall(r'\*\s*(.*?):\s*(.*)', notes_text)
|
83 |
return [{'note': note.strip(), 'description': desc.strip()} for note, desc in notes]
|
84 |
|
|
|
72 |
poetic_desc = re.search(r'Poetic Olfactory Description:\s*"(.*?)"', text, re.DOTALL).group(1).strip()
|
73 |
general_atmosphere = re.search(r'General Atmosphere:\s*(.+)', text, re.DOTALL).group(1).strip()
|
74 |
|
75 |
+
# More flexible notes extractor
|
76 |
def extract_notes(section_name):
|
77 |
+
pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
|
78 |
+
match = re.search(pattern, text, re.MULTILINE)
|
79 |
if not match:
|
80 |
return []
|
81 |
+
notes_text = match.group(1)
|
82 |
notes = re.findall(r'\*\s*(.*?):\s*(.*)', notes_text)
|
83 |
return [{'note': note.strip(), 'description': desc.strip()} for note, desc in notes]
|
84 |
|