fffiloni commited on
Commit
5b9cfc0
·
verified ·
1 Parent(s): dce42cf

correct image description parse grab

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -97,6 +97,11 @@ def parse_perfume_description(text: str) -> dict:
97
  text, re.DOTALL)
98
  poetic_desc = poetic_desc_match.group(1).strip() if poetic_desc_match else ""
99
 
 
 
 
 
 
100
  # Image Description: quoted or plain
101
  image_desc_match = re.search(
102
  r'Image Description:\s*"(.*?)"', text, re.DOTALL)
@@ -104,15 +109,9 @@ def parse_perfume_description(text: str) -> dict:
104
  image_desc = image_desc_match.group(1).strip()
105
  else:
106
  image_desc_match = re.search(
107
- r'Image Description:\s*(.*?)\s*(Olfactory Pyramid:|Poetic Olfactory Description:|General Atmosphere:)',
108
- text, re.DOTALL)
109
  image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
110
 
111
- # General Atmosphere
112
- general_atmosphere_match = re.search(
113
- r'General Atmosphere:\s*(.+)', text, re.DOTALL)
114
- general_atmosphere = general_atmosphere_match.group(1).strip() if general_atmosphere_match else ""
115
-
116
  # Flexible bullet extractor
117
  def extract_notes(section_name):
118
  pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'
 
97
  text, re.DOTALL)
98
  poetic_desc = poetic_desc_match.group(1).strip() if poetic_desc_match else ""
99
 
100
+ # General Atmosphere: stop at Image Description if present
101
+ general_atmosphere_match = re.search(
102
+ r'General Atmosphere:\s*(.*?)(?:\s*Image Description:|$)', text, re.DOTALL)
103
+ general_atmosphere = general_atmosphere_match.group(1).strip() if general_atmosphere_match else ""
104
+
105
  # Image Description: quoted or plain
106
  image_desc_match = re.search(
107
  r'Image Description:\s*"(.*?)"', text, re.DOTALL)
 
109
  image_desc = image_desc_match.group(1).strip()
110
  else:
111
  image_desc_match = re.search(
112
+ r'Image Description:\s*(.*?)$', text, re.DOTALL)
 
113
  image_desc = image_desc_match.group(1).strip() if image_desc_match else ""
114
 
 
 
 
 
 
115
  # Flexible bullet extractor
116
  def extract_notes(section_name):
117
  pattern = rf'{section_name}:\s*\n((?:\*.*(?:\n|$))+)'