matteomarjanovic commited on
Commit
6df2860
·
1 Parent(s): 42b1e2e

finish gogle gemini implementation

Browse files
Files changed (1) hide show
  1. app.py +60 -37
app.py CHANGED
@@ -78,6 +78,20 @@ def encode_image(pil_image):
78
 
79
  # return image
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  @spaces.GPU #[uncomment to use ZeroGPU]
82
  def generate_description_fn(
83
  image,
@@ -85,44 +99,53 @@ def generate_description_fn(
85
  ):
86
  base64_image = encode_image(image)
87
 
88
- client = Groq(
89
- api_key=os.environ.get("GROQ_API_KEY"),
 
 
90
  )
91
-
92
- chat_completion = client.chat.completions.create(
93
- messages=[
94
- {
95
- "role": "user",
96
- "content": [
97
- {
98
- "type": "text",
99
- "text": """
100
- I want you to imagine how the technical flat sketch of the garment you see in the picture would look like, and describe it in rich details, in one paragraph.
101
- Don't add any additional comment.
102
- Specify that the flat sketch is black and white (even if the original garment has a color) and that it doesn't include the person that wear the garment.
103
-
104
-
105
- The style of the result should look somewhat like the following example:
106
- The technical flat sketch of the dress depicts a midi-length, off-the-shoulder design with a smocked bodice and short puff sleeves that have elasticized cuffs.
107
- The elastic neckline sits straight across the chest and back, ensuring a secure fit.
108
- The bodice transitions into a flowy, tiered skirt with three evenly spaced gathered panels, creating soft volume.
109
- Elasticized areas are marked with textured lines, while the gathers and drape is indicated through subtle curved strokes, ensuring clarity in construction details.
110
- The flat sketch does NOT include any person and it's only the in black and white, being a technical drawing.
111
- """
112
- },
113
- {
114
- "type": "image_url",
115
- "image_url": {
116
- "url": f"data:image/jpeg;base64,{base64_image}",
117
- },
118
- },
119
- ],
120
- }
121
- ],
122
- model="llama-3.2-11b-vision-preview",
123
- )
124
-
125
- prompt = chat_completion.choices[0].message.content + " In the style of FLTSKC"
 
 
 
 
 
 
 
126
  control_image = processor(
127
  image,
128
  low_threshold=50,
 
78
 
79
  # return image
80
 
81
+ description_prompt = """
82
+ I want you to imagine how the technical flat sketch of the garment you see in the picture would look like, and describe it in rich details, in one paragraph.
83
+ Don't add any additional comment.
84
+ Specify that the flat sketch is black and white (even if the original garment has a color) and that it doesn't include the person that wear the garment.
85
+
86
+
87
+ The style of the result should look somewhat like the following example:
88
+ The technical flat sketch of the dress depicts a midi-length, off-the-shoulder design with a smocked bodice and short puff sleeves that have elasticized cuffs.
89
+ The elastic neckline sits straight across the chest and back, ensuring a secure fit.
90
+ The bodice transitions into a flowy, tiered skirt with three evenly spaced gathered panels, creating soft volume.
91
+ Elasticized areas are marked with textured lines, while the gathers and drape is indicated through subtle curved strokes, ensuring clarity in construction details.
92
+ The flat sketch does NOT include any person and it's only the in black and white, being a technical drawing.
93
+ """
94
+
95
  @spaces.GPU #[uncomment to use ZeroGPU]
96
  def generate_description_fn(
97
  image,
 
99
  ):
100
  base64_image = encode_image(image)
101
 
102
+ client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
103
+ response = client.models.generate_content(
104
+ model="gemini-2.0-flash",
105
+ contents=[description_prompt, image]
106
  )
107
+ prompt = response.text + " In the style of FLTSKC"
108
+
109
+
110
+ # client = Groq(
111
+ # api_key=os.environ.get("GROQ_API_KEY"),
112
+ # )
113
+
114
+ # chat_completion = client.chat.completions.create(
115
+ # messages=[
116
+ # {
117
+ # "role": "user",
118
+ # "content": [
119
+ # {
120
+ # "type": "text",
121
+ # "text": """
122
+ # I want you to imagine how the technical flat sketch of the garment you see in the picture would look like, and describe it in rich details, in one paragraph.
123
+ # Don't add any additional comment.
124
+ # Specify that the flat sketch is black and white (even if the original garment has a color) and that it doesn't include the person that wear the garment.
125
+
126
+
127
+ # The style of the result should look somewhat like the following example:
128
+ # The technical flat sketch of the dress depicts a midi-length, off-the-shoulder design with a smocked bodice and short puff sleeves that have elasticized cuffs.
129
+ # The elastic neckline sits straight across the chest and back, ensuring a secure fit.
130
+ # The bodice transitions into a flowy, tiered skirt with three evenly spaced gathered panels, creating soft volume.
131
+ # Elasticized areas are marked with textured lines, while the gathers and drape is indicated through subtle curved strokes, ensuring clarity in construction details.
132
+ # The flat sketch does NOT include any person and it's only the in black and white, being a technical drawing.
133
+ # """
134
+ # },
135
+ # {
136
+ # "type": "image_url",
137
+ # "image_url": {
138
+ # "url": f"data:image/jpeg;base64,{base64_image}",
139
+ # },
140
+ # },
141
+ # ],
142
+ # }
143
+ # ],
144
+ # model="llama-3.2-11b-vision-preview",
145
+ # )
146
+
147
+ # prompt = chat_completion.choices[0].message.content + " In the style of FLTSKC"
148
+
149
  control_image = processor(
150
  image,
151
  low_threshold=50,