Spaces:
Running
on
Zero
Running
on
Zero
correct image description parse grab
Browse files
app.py
CHANGED
|
@@ -97,6 +97,11 @@ def parse_perfume_description(text: str) -> dict:
|
|
| 97 |
text, re.DOTALL)
|
| 98 |
poetic_desc = poetic_desc_match.group(1).strip() if poetic_desc_match else ""
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
# Image Description: quoted or plain
|
| 101 |
image_desc_match = re.search(
|
| 102 |
r'Image Description:\s*"(.*?)"', text, re.DOTALL)
|
|
@@ -104,15 +109,9 @@ def parse_perfume_description(text: str) -> dict:
|
|
| 104 |
image_desc = image_desc_match.group(1).strip()
|
| 105 |
else:
|
| 106 |
image_desc_match = re.search(
|
| 107 |
-
r'Image Description:\s*(.*?)
|
| 108 |
-
text, re.DOTALL)
|
| 109 |
image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
|
| 110 |
|
| 111 |
-
# General Atmosphere
|
| 112 |
-
general_atmosphere_match = re.search(
|
| 113 |
-
r'General Atmosphere:\s*(.+)', text, re.DOTALL)
|
| 114 |
-
general_atmosphere = general_atmosphere_match.group(1).strip() if general_atmosphere_match else ""
|
| 115 |
-
|
| 116 |
# Flexible bullet extractor
|
| 117 |
def extract_notes(section_name):
|
| 118 |
pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
|
|
|
|
| 97 |
text, re.DOTALL)
|
| 98 |
poetic_desc = poetic_desc_match.group(1).strip() if poetic_desc_match else ""
|
| 99 |
|
| 100 |
+
# General Atmosphere: stop at Image Description if present
|
| 101 |
+
general_atmosphere_match = re.search(
|
| 102 |
+
r'General Atmosphere:\s*(.*?)(?:\s*Image Description:|$)', text, re.DOTALL)
|
| 103 |
+
general_atmosphere = general_atmosphere_match.group(1).strip() if general_atmosphere_match else ""
|
| 104 |
+
|
| 105 |
# Image Description: quoted or plain
|
| 106 |
image_desc_match = re.search(
|
| 107 |
r'Image Description:\s*"(.*?)"', text, re.DOTALL)
|
|
|
|
| 109 |
image_desc = image_desc_match.group(1).strip()
|
| 110 |
else:
|
| 111 |
image_desc_match = re.search(
|
| 112 |
+
r'Image Description:\s*(.*?)$', text, re.DOTALL)
|
|
|
|
| 113 |
image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
# Flexible bullet extractor
|
| 116 |
def extract_notes(section_name):
|
| 117 |
pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
|