attribution-2steps-method

Sleeping

App Files Files Community

thanhnt-cf commited on May 24

Commit

b652f9c

1 Parent(s): 4e55bb0

fix percentages

Browse files

Files changed (3) hide show

app/core/prompts.py +5 -5
app/services/base.py +14 -1
app/services/service_openai.py +6 -4

app/core/prompts.py CHANGED Viewed

@@ -19,21 +19,21 @@ FOLLOW_SCHEMA_HUMAN = """Convert following attributes to structured schema. Keep
 {json_info}"""
-GET_PERCENTAGE_SYSTEM = "You have to assign a percentage of certainty from, don't output just 0 and 1"
-GET_PERCENTAGE_HUMAN = """For each allowed value in each attribute, assign a percentage of certainty (in scale of 100) that the product fits that value.
-If an attribute can have multiple values, evaluate each value independently. If an attribute can have only one value, the percentages of certainty should sum up to 100.
 You should use the following product data to assist you, if available:
 {product_data}
 If an attribute appears in both the image and the product data, use the value from the product data.
 """
-REEVALUATE_SYSTEM = "You are an expert in structured data extraction. You will be given an image or a set of images of a product and set of attributes and should reevaluate certainity of the attributes into the given structure."
 REEVALUATE_HUMAN = """Reevaluate the following attributes of the main product (or {product_taxonomy}) shown in the images. Here are the attributes to reevaluate:
 {product_data}
-If an attribute can have multiple values, do not need to reevaluate the values, just the attribute itself. If an attribute can have only one value, reevaluate the top three values.
 """
 class Prompts(BaseSettings):

 {json_info}"""
+GET_PERCENTAGE_SYSTEM = "You are a fashion assistant. You have to assign percentages of cerntainty to each attribute of a product based on the image and product data. You will be given an image or a set of images of a product and set of attributes and should output the percentages of certainty into the given structure."
+GET_PERCENTAGE_HUMAN = """For each allowed value in each attribute, assign a percentage of certainty (from 0 to 100) that the product fits that value.
+For attributes of type list[string], there can be multiple values, and multiple percentages of 100 are possible.
 You should use the following product data to assist you, if available:
 {product_data}
 If an attribute appears in both the image and the product data, use the value from the product data.
 """
+REEVALUATE_SYSTEM = "You are a fashion assistant. You have to reevaluate the attributes of a product based on the image and product data. You will be given an image or a set of images of a product and set of attributes and should output the reevaluated attributes into the given structure."
 REEVALUATE_HUMAN = """Reevaluate the following attributes of the main product (or {product_taxonomy}) shown in the images. Here are the attributes to reevaluate:
 {product_data}
+If an attribute has type of string, do not need to reevaluate the values, just the attribute itself. If an attribute has type of list[string], reevaluate the top three values.
 """
 class Prompts(BaseSettings):

app/services/base.py CHANGED Viewed

@@ -27,7 +27,7 @@ def cf_style_to_pydantic_percentage_shema(
         else:
             multiple = False
         class_name = "Class_" + attribute.capitalize()
-        multiple_desc = "Can have multiple values" if multiple else "Single value only"
         attribute_desc = attribute_info.description
         attribute_line = f'{attribute}: {class_name} = Field("", description="{multiple_desc}, {attribute_desc}")'
@@ -52,6 +52,12 @@ class Product(BaseModel):
     exec(pydantic_code, globals())
     return Product
 class BaseAttributionService(ABC):
     @abstractmethod
@@ -62,6 +68,7 @@ class BaseAttributionService(ABC):
         img_urls: List[str],
         product_taxonomy: str,
         pil_images: List[Any] = None,
     ) -> Dict[str, Any]:
         pass
@@ -73,6 +80,7 @@ class BaseAttributionService(ABC):
         img_urls: List[str],
         product_taxonomy: str,
         pil_images: List[Any] = None,
     ) -> Dict[str, Any]:
         pass
@@ -91,6 +99,7 @@ class BaseAttributionService(ABC):
         product_data: Dict[str, Union[str, List[str]]],
         pil_images: List[Any] = None,
         img_paths: List[str] = None,
     ) -> Dict[str, Any]:
         # validate_json_schema(schema)
@@ -105,6 +114,8 @@ class BaseAttributionService(ABC):
         for key, value in attributes.items():
             transformed_attributes[forward_mapping[key]] = value
         # attributes_model = convert_attribute_to_model(transformed_attributes)
         attributes_percentage_model = cf_style_to_pydantic_percentage_shema(transformed_attributes)
         schema = attributes_percentage_model.model_json_schema()
@@ -116,6 +127,7 @@ class BaseAttributionService(ABC):
             product_data,
             # pil_images=pil_images, # temporarily removed to save cost
             img_paths=img_paths,
         )
         validate_json_data(data, schema)
@@ -128,6 +140,7 @@ class BaseAttributionService(ABC):
             str_data,
             # pil_images=pil_images, # temporarily removed to save cost
             img_paths=img_paths,
         )
         init_reevaluate_data = {}

         else:
             multiple = False
         class_name = "Class_" + attribute.capitalize()
+        multiple_desc = "multi-label classification" if multiple else "classification"
         attribute_desc = attribute_info.description
         attribute_line = f'{attribute}: {class_name} = Field("", description="{multiple_desc}, {attribute_desc}")'
     exec(pydantic_code, globals())
     return Product
+def build_attributes_types_prompt(attributes):
+    list_of_types_prompt = "\n List of attributes types:\n"
+    for key, value in attributes.items():
+        list_of_types_prompt += f"- {key}: {value.data_type}\n"
+    return list_of_types_prompt
 class BaseAttributionService(ABC):
     @abstractmethod
         img_urls: List[str],
         product_taxonomy: str,
         pil_images: List[Any] = None,
+        appended_prompt: str = "",
     ) -> Dict[str, Any]:
         pass
         img_urls: List[str],
         product_taxonomy: str,
         pil_images: List[Any] = None,
+        appended_prompt: str = "",
     ) -> Dict[str, Any]:
         pass
         product_data: Dict[str, Union[str, List[str]]],
         pil_images: List[Any] = None,
         img_paths: List[str] = None,
+        appended_prompt = str
     ) -> Dict[str, Any]:
         # validate_json_schema(schema)
         for key, value in attributes.items():
             transformed_attributes[forward_mapping[key]] = value
+        attributes_types_prompt = build_attributes_types_prompt(attributes)
         # attributes_model = convert_attribute_to_model(transformed_attributes)
         attributes_percentage_model = cf_style_to_pydantic_percentage_shema(transformed_attributes)
         schema = attributes_percentage_model.model_json_schema()
             product_data,
             # pil_images=pil_images, # temporarily removed to save cost
             img_paths=img_paths,
+            appended_prompt=attributes_types_prompt
         )
         validate_json_data(data, schema)
             str_data,
             # pil_images=pil_images, # temporarily removed to save cost
             img_paths=img_paths,
+            appended_prompt=attributes_types_prompt
         )
         init_reevaluate_data = {}

app/services/service_openai.py CHANGED Viewed

@@ -68,10 +68,11 @@ class OpenAIService(BaseAttributionService):
         product_data: Dict[str, Union[str, List[str]]],
         pil_images: List[Any] = None,  # do not remove, this is for weave
         img_paths: List[str] = None,
     ) -> Dict[str, Any]:
         print("Prompt: ")
-        print(prompts.GET_PERCENTAGE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data_to_str(product_data)))
         text_content = [
             {
@@ -79,7 +80,7 @@ class OpenAIService(BaseAttributionService):
                 "text": prompts.EXTRACT_INFO_HUMAN_MESSAGE.format(
                     product_taxonomy=product_taxonomy,
                     product_data=product_data_to_str(product_data),
-                ),
             },
         ]
         if img_urls is not None:
@@ -157,10 +158,11 @@ class OpenAIService(BaseAttributionService):
         product_data: str,
         pil_images: List[Any] = None,  # do not remove, this is for weave
         img_paths: List[str] = None,
     ) -> Dict[str, Any]:
         print("Prompt: ")
-        print(prompts.REEVALUATE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data))
         text_content = [
             {
@@ -168,7 +170,7 @@ class OpenAIService(BaseAttributionService):
                 "text": prompts.REEVALUATE_HUMAN_MESSAGE.format(
                     product_taxonomy=product_taxonomy,
                     product_data=product_data,
-                ),
             },
         ]
         if img_urls is not None:

         product_data: Dict[str, Union[str, List[str]]],
         pil_images: List[Any] = None,  # do not remove, this is for weave
         img_paths: List[str] = None,
+        appended_prompt: str = "",
     ) -> Dict[str, Any]:
         print("Prompt: ")
+        print(prompts.GET_PERCENTAGE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data_to_str(product_data)) + appended_prompt)
         text_content = [
             {
                 "text": prompts.EXTRACT_INFO_HUMAN_MESSAGE.format(
                     product_taxonomy=product_taxonomy,
                     product_data=product_data_to_str(product_data),
+                ) + appended_prompt,
             },
         ]
         if img_urls is not None:
         product_data: str,
         pil_images: List[Any] = None,  # do not remove, this is for weave
         img_paths: List[str] = None,
+        appended_prompt: str = "",
     ) -> Dict[str, Any]:
         print("Prompt: ")
+        print(prompts.REEVALUATE_HUMAN_MESSAGE.format(product_taxonomy=product_taxonomy, product_data=product_data) + appended_prompt)
         text_content = [
             {
                 "text": prompts.REEVALUATE_HUMAN_MESSAGE.format(
                     product_taxonomy=product_taxonomy,
                     product_data=product_data,
+                ) + appended_prompt,
             },
         ]
         if img_urls is not None: