thanhnt-cf commited on
Commit
b652f9c
·
1 Parent(s): 4e55bb0

fix percentages

Browse files
app/core/prompts.py CHANGED
@@ -19,21 +19,21 @@ FOLLOW_SCHEMA_HUMAN = """Convert following attributes to structured schema. Keep
19
 
20
  {json_info}"""
21
 
22
- GET_PERCENTAGE_SYSTEM = "You have to assign a percentage of certainty from, don't output just 0 and 1"
23
 
24
- GET_PERCENTAGE_HUMAN = """For each allowed value in each attribute, assign a percentage of certainty (in scale of 100) that the product fits that value.
25
- If an attribute can have multiple values, evaluate each value independently. If an attribute can have only one value, the percentages of certainty should sum up to 100.
26
  You should use the following product data to assist you, if available:
27
  {product_data}
28
  If an attribute appears in both the image and the product data, use the value from the product data.
29
  """
30
 
31
- REEVALUATE_SYSTEM = "You are an expert in structured data extraction. You will be given an image or a set of images of a product and set of attributes and should reevaluate certainity of the attributes into the given structure."
32
 
33
  REEVALUATE_HUMAN = """Reevaluate the following attributes of the main product (or {product_taxonomy}) shown in the images. Here are the attributes to reevaluate:
34
  {product_data}
35
 
36
- If an attribute can have multiple values, do not need to reevaluate the values, just the attribute itself. If an attribute can have only one value, reevaluate the top three values.
37
  """
38
 
39
  class Prompts(BaseSettings):
 
19
 
20
  {json_info}"""
21
 
22
+ GET_PERCENTAGE_SYSTEM = "You are a fashion assistant. You have to assign percentages of cerntainty to each attribute of a product based on the image and product data. You will be given an image or a set of images of a product and set of attributes and should output the percentages of certainty into the given structure."
23
 
24
+ GET_PERCENTAGE_HUMAN = """For each allowed value in each attribute, assign a percentage of certainty (from 0 to 100) that the product fits that value.
25
+ For attributes of type list[string], there can be multiple values, and multiple percentages of 100 are possible.
26
  You should use the following product data to assist you, if available:
27
  {product_data}
28
  If an attribute appears in both the image and the product data, use the value from the product data.
29
  """
30
 
31
+ REEVALUATE_SYSTEM = "You are a fashion assistant. You have to reevaluate the attributes of a product based on the image and product data. You will be given an image or a set of images of a product and set of attributes and should output the reevaluated attributes into the given structure."
32
 
33
  REEVALUATE_HUMAN = """Reevaluate the following attributes of the main product (or {product_taxonomy}) shown in the images. Here are the attributes to reevaluate:
34
  {product_data}
35
 
36
+ If an attribute has type of string, do not need to reevaluate the values, just the attribute itself. If an attribute has type of list[string], reevaluate the top three values.
37
  """
38
 
39
  class Prompts(BaseSettings):
app/services/base.py CHANGED
@@ -27,7 +27,7 @@ def cf_style_to_pydantic_percentage_shema(
27
  else:
28
  multiple = False
29
  class_name = "Class_" + attribute.capitalize()
30
- multiple_desc = "Can have multiple values" if multiple else "Single value only"
31
  attribute_desc = attribute_info.description
32
  attribute_line = f'{attribute}: {class_name} = Field("", description="{multiple_desc}, {attribute_desc}")'
33
 
@@ -52,6 +52,12 @@ class Product(BaseModel):
52
  exec(pydantic_code, globals())
53
  return Product
54
 
 
 
 
 
 
 
55
 
56
  class BaseAttributionService(ABC):
57
  @abstractmethod
@@ -62,6 +68,7 @@ class BaseAttributionService(ABC):
62
  img_urls: List[str],
63
  product_taxonomy: str,
64
  pil_images: List[Any] = None,
 
65
  ) -> Dict[str, Any]:
66
  pass
67
 
@@ -73,6 +80,7 @@ class BaseAttributionService(ABC):
73
  img_urls: List[str],
74
  product_taxonomy: str,
75
  pil_images: List[Any] = None,
 
76
  ) -> Dict[str, Any]:
77
  pass
78
 
@@ -91,6 +99,7 @@ class BaseAttributionService(ABC):
91
  product_data: Dict[str, Union[str, List[str]]],
92
  pil_images: List[Any] = None,
93
  img_paths: List[str] = None,
 
94
  ) -> Dict[str, Any]:
95
  # validate_json_schema(schema)
96
 
@@ -105,6 +114,8 @@ class BaseAttributionService(ABC):
105
  for key, value in attributes.items():
106
  transformed_attributes[forward_mapping[key]] = value
107
 
 
 
108
  # attributes_model = convert_attribute_to_model(transformed_attributes)
109
  attributes_percentage_model = cf_style_to_pydantic_percentage_shema(transformed_attributes)
110
  schema = attributes_percentage_model.model_json_schema()
@@ -116,6 +127,7 @@ class BaseAttributionService(ABC):
116
  product_data,
117
  # pil_images=pil_images, # temporarily removed to save cost
118
  img_paths=img_paths,
 
119
  )
120
  validate_json_data(data, schema)
121
 
@@ -128,6 +140,7 @@ class BaseAttributionService(ABC):
128
  str_data,
129
  # pil_images=pil_images, # temporarily removed to save cost
130
  img_paths=img_paths,
 
131
  )
132
 
133
  init_reevaluate_data = {}
 
27
  else:
28
  multiple = False
29
  class_name = "Class_" + attribute.capitalize()
30
+ multiple_desc = "multi-label classification" if multiple else "classification"
31
  attribute_desc = attribute_info.description
32
  attribute_line = f'{attribute}: {class_name} = Field("", description="{multiple_desc}, {attribute_desc}")'
33
 
 
52
  exec(pydantic_code, globals())
53
  return Product
54
 
55
+ def build_attributes_types_prompt(attributes):
56
+ list_of_types_prompt = "\n List of attributes types:\n"
57
+ for key, value in attributes.items():
58
+ list_of_types_prompt += f"- {key}: {value.data_type}\n"
59
+ return list_of_types_prompt
60
+
61
 
62
  class BaseAttributionService(ABC):
63
  @abstractmethod
 
68
  img_urls: List[str],
69
  product_taxonomy: str,
70
  pil_images: List[Any] = None,
71
+ appended_prompt: str = "",
72
  ) -> Dict[str, Any]:
73
  pass
74
 
 
80
  img_urls: List[str],
81
  product_taxonomy: str,
82
  pil_images: List[Any] = None,
83
+ appended_prompt: str = "",
84
  ) -> Dict[str, Any]:
85
  pass
86
 
 
99
  product_data: Dict[str, Union[str, List[str]]],
100
  pil_images: List[Any] = None,
101
  img_paths: List[str] = None,
102
+ appended_prompt = str
103
  ) -> Dict[str, Any]:
104
  # validate_json_schema(schema)
105
 
 
114
  for key, value in attributes.items():
115
  transformed_attributes[forward_mapping[key]] = value
116
 
117
+ attributes_types_prompt = build_attributes_types_prompt(attributes)
118
+
119
  # attributes_model = convert_attribute_to_model(transformed_attributes)
120
  attributes_percentage_model = cf_style_to_pydantic_percentage_shema(transformed_attributes)
121
  schema = attributes_percentage_model.model_json_schema()
 
127
  product_data,
128
  # pil_images=pil_images, # temporarily removed to save cost
129
  img_paths=img_paths,
130
+ appended_prompt=attributes_types_prompt
131
  )
132
  validate_json_data(data, schema)
133
 
 
140
  str_data,
141
  # pil_images=pil_images, # temporarily removed to save cost
142
  img_paths=img_paths,
143
+ appended_prompt=attributes_types_prompt
144
  )
145
 
146
  init_reevaluate_data = {}
app/services/service_openai.py CHANGED
@@ -68,10 +68,11 @@ class OpenAIService(BaseAttributionService):
68
  product_data: Dict[str, Union[str, List[str]]],
69
  pil_images: List[Any] = None, # do not remove, this is for weave
70
  img_paths: List[str] = None,
 
71
  ) -> Dict[str, Any]:
72
 
73
  print("Prompt: ")
74
- print(prompts.GET_PERCENTAGE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data_to_str(product_data)))
75
 
76
  text_content = [
77
  {
@@ -79,7 +80,7 @@ class OpenAIService(BaseAttributionService):
79
  "text": prompts.EXTRACT_INFO_HUMAN_MESSAGE.format(
80
  product_taxonomy=product_taxonomy,
81
  product_data=product_data_to_str(product_data),
82
- ),
83
  },
84
  ]
85
  if img_urls is not None:
@@ -157,10 +158,11 @@ class OpenAIService(BaseAttributionService):
157
  product_data: str,
158
  pil_images: List[Any] = None, # do not remove, this is for weave
159
  img_paths: List[str] = None,
 
160
  ) -> Dict[str, Any]:
161
 
162
  print("Prompt: ")
163
- print(prompts.REEVALUATE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data))
164
 
165
  text_content = [
166
  {
@@ -168,7 +170,7 @@ class OpenAIService(BaseAttributionService):
168
  "text": prompts.REEVALUATE_HUMAN_MESSAGE.format(
169
  product_taxonomy=product_taxonomy,
170
  product_data=product_data,
171
- ),
172
  },
173
  ]
174
  if img_urls is not None:
 
68
  product_data: Dict[str, Union[str, List[str]]],
69
  pil_images: List[Any] = None, # do not remove, this is for weave
70
  img_paths: List[str] = None,
71
+ appended_prompt: str = "",
72
  ) -> Dict[str, Any]:
73
 
74
  print("Prompt: ")
75
+ print(prompts.GET_PERCENTAGE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data_to_str(product_data)) + appended_prompt)
76
 
77
  text_content = [
78
  {
 
80
  "text": prompts.EXTRACT_INFO_HUMAN_MESSAGE.format(
81
  product_taxonomy=product_taxonomy,
82
  product_data=product_data_to_str(product_data),
83
+ ) + appended_prompt,
84
  },
85
  ]
86
  if img_urls is not None:
 
158
  product_data: str,
159
  pil_images: List[Any] = None, # do not remove, this is for weave
160
  img_paths: List[str] = None,
161
+ appended_prompt: str = "",
162
  ) -> Dict[str, Any]:
163
 
164
  print("Prompt: ")
165
+ print(prompts.REEVALUATE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data) + appended_prompt)
166
 
167
  text_content = [
168
  {
 
170
  "text": prompts.REEVALUATE_HUMAN_MESSAGE.format(
171
  product_taxonomy=product_taxonomy,
172
  product_data=product_data,
173
+ ) + appended_prompt,
174
  },
175
  ]
176
  if img_urls is not None: