snowkylin commited on
Commit
d5ee2a4
·
1 Parent(s): 7a3c79c

allow additional reference images, resize image before use

Browse files
Files changed (2) hide show
  1. app.py +45 -19
  2. readme.md +2 -2
app.py CHANGED
@@ -5,13 +5,15 @@ import torch
5
  from threading import Thread
6
  import requests
7
  import json
 
 
8
  import os
9
  import base64
10
  from openai import OpenAI
11
 
12
  default_img = None
13
  default_base_url = "https://openrouter.ai/api/v1"
14
- default_api_model = "google/gemma-3-27b-it:free"
15
 
16
  model_id = "google/gemma-3-4b-it"
17
 
@@ -32,10 +34,13 @@ lang_store = {
32
  "confirm": "Confirm",
33
  "default_description": "",
34
  "additional_description": "Character description (optional)",
35
- "title": "<h1>Chat with a character via reference sheet!</h1>",
 
 
36
  "upload": "Upload the reference sheet of your character here",
37
- "prompt": "You are the character in the image. Start without confirmation.",
38
  "additional_info_prompt": "Additional info: ",
 
39
  "description": "Description",
40
  "more_options": "More Options",
41
  "method": "Method",
@@ -49,10 +54,13 @@ lang_store = {
49
  "confirm": "确认",
50
  "default_description": "",
51
  "additional_description": "角色描述(可选)",
 
52
  "title": "<h1>与设定图中的角色聊天!</h1>",
 
53
  "upload": "在这里上传角色设定图",
54
- "prompt": "你的身份是图中的角色,使用中文。无需确认。",
55
  "additional_info_prompt": "补充信息:",
 
56
  "description": "角色描述",
57
  "more_options": "更多选项",
58
  "method": "方法",
@@ -64,17 +72,31 @@ lang_store = {
64
  },
65
  }
66
 
67
- def get_init_prompt(img, description):
 
 
 
 
 
 
 
 
 
68
  prompt = _("prompt")
69
  if description != "":
70
- prompt += _("additional_info_prompt") + description
 
 
 
 
 
 
 
 
71
  return [
72
  {
73
  "role": "user",
74
- "content": [
75
- {"type": "image", "url": img},
76
- {"type": "text", "text": prompt}
77
- ]
78
  }
79
  ]
80
 
@@ -101,9 +123,7 @@ def generate(history, engine, base_url, api_model, api_key):
101
  for item_i in item['content']:
102
  if item_i['type'] == 'image':
103
  item_i['type'] = 'image_url'
104
- with open(item_i['url'], "rb") as image_file:
105
- data = base64.b64encode(image_file.read()).decode("utf-8")
106
- item_i['image_url'] = {'url': 'data:image/jpeg;base64,' + data}
107
  del item_i['url']
108
  if base_url == default_base_url and api_model == default_api_model and api_key == "":
109
  api_key = os.environ['OPENROUTER_TOKEN']
@@ -122,8 +142,8 @@ def generate(history, engine, base_url, api_model, api_key):
122
  yield collected_text
123
 
124
 
125
- def prefill_chatbot(img, description, engine, base_url, api_model, api_key):
126
- history = get_init_prompt(img, description)
127
 
128
  ret = [{'role': 'assistant', 'content': ""}]
129
  for generated_text in generate(history, engine, base_url, api_model, api_key):
@@ -131,9 +151,9 @@ def prefill_chatbot(img, description, engine, base_url, api_model, api_key):
131
  yield ret
132
 
133
 
134
- def response(message, history: list, img, description, engine, base_url, api_model, api_key):
135
  history = [{"role": item["role"], "content": [{"type": "text", "text": item["content"]}]} for item in history]
136
- history = get_init_prompt(img, description) + history
137
  history.append(
138
  {"role": "user", "content": [{"type": "text", "text": message}]}
139
  )
@@ -146,6 +166,11 @@ with gr.Blocks(title="Chat with a character via reference sheet!") as demo:
146
  gr.HTML(_("title"))
147
  img = gr.Image(type="filepath", value=default_img, label=_("upload"), render=False)
148
  description = gr.TextArea(value=_("default_description"), label=_("additional_description"), render=False)
 
 
 
 
 
149
  confirm_btn = gr.Button(_("confirm"), render=False)
150
  chatbot = gr.Chatbot(height=600, type='messages', label=_("chatbox"), render=False)
151
  engine = gr.Radio([(_('local'), 'local'), ('API', 'api')],
@@ -158,6 +183,7 @@ with gr.Blocks(title="Chat with a character via reference sheet!") as demo:
158
  img.render()
159
  with gr.Tab(_("description")):
160
  description.render()
 
161
  with gr.Tab(_("more_options")):
162
  engine.render()
163
  base_url.render()
@@ -169,9 +195,9 @@ with gr.Blocks(title="Chat with a character via reference sheet!") as demo:
169
  response,
170
  chatbot=chatbot,
171
  type="messages",
172
- additional_inputs=[img, description, engine, base_url, api_model, api_key],
173
  )
174
- confirm_btn.click(prefill_chatbot, [img, description, engine, base_url, api_model, api_key], chat.chatbot)\
175
  .then(lambda x: x, chat.chatbot, chat.chatbot_value)
176
 
177
 
 
5
  from threading import Thread
6
  import requests
7
  import json
8
+ import io
9
+ from PIL import Image
10
  import os
11
  import base64
12
  from openai import OpenAI
13
 
14
  default_img = None
15
  default_base_url = "https://openrouter.ai/api/v1"
16
+ default_api_model = "google/gemma-3-27b-it"
17
 
18
  model_id = "google/gemma-3-4b-it"
19
 
 
34
  "confirm": "Confirm",
35
  "default_description": "",
36
  "additional_description": "Character description (optional)",
37
+ "more_imgs": "More reference images of the character (optional)",
38
+ "title": "<h1>Chat with a character via reference sheet!</h1>>",
39
+ "powered_by_gemma": "<p>Powered by <a href='https://blog.google/technology/developers/gemma-3/'>Gemma 3</a></p",
40
  "upload": "Upload the reference sheet of your character here",
41
+ "prompt": "You are the character in the image. Do not include list in response unless requested. Do not mention the reference images. Start without confirmation.",
42
  "additional_info_prompt": "Additional info: ",
43
+ "additional_reference_images_prompt": "Additional reference images of the character:",
44
  "description": "Description",
45
  "more_options": "More Options",
46
  "method": "Method",
 
54
  "confirm": "确认",
55
  "default_description": "",
56
  "additional_description": "角色描述(可选)",
57
+ "more_imgs": "更多角色参考图(可选,可上传多张)",
58
  "title": "<h1>与设定图中的角色聊天!</h1>",
59
+ "powered_by_gemma": "<p>由 <a href='https://blog.google/technology/developers/gemma-3/'>Gemma 3</a> 驱动</p>",
60
  "upload": "在这里上传角色设定图",
61
+ "prompt": "你的身份是图中的角色,使用中文。除非对方要求,否则不在回复中使用列表。不在回复中提及参考图。无需确认。",
62
  "additional_info_prompt": "补充信息:",
63
+ "additional_reference_images_prompt": "该角色的更多参考图:",
64
  "description": "角色描述",
65
  "more_options": "更多选项",
66
  "method": "方法",
 
72
  },
73
  }
74
 
75
+ def encode_img(filepath, thumbnail=(896, 896)):
76
+ more_img = Image.open(filepath)
77
+ more_img = more_img.convert('RGB')
78
+ more_img.thumbnail(thumbnail)
79
+ buffer = io.BytesIO()
80
+ more_img.save(buffer, "JPEG", quality=60)
81
+ encoded_img = "data:image/jpeg;base64," + base64.b64encode(buffer.getvalue()).decode("utf-8")
82
+ return encoded_img
83
+
84
+ def get_init_prompt(img, description, more_imgs):
85
  prompt = _("prompt")
86
  if description != "":
87
+ prompt += "\n" + _("additional_info_prompt") + description
88
+ if more_imgs is None:
89
+ more_imgs = []
90
+ if len(more_imgs) > 0:
91
+ prompt += "\n" + _("additional_reference_images_prompt")
92
+ content = [
93
+ {"type": "image", "url": encode_img(img)},
94
+ {"type": "text", "text": prompt}
95
+ ] + [{"type": "image", "url": encode_img(filepath)} for filepath in more_imgs]
96
  return [
97
  {
98
  "role": "user",
99
+ "content": content
 
 
 
100
  }
101
  ]
102
 
 
123
  for item_i in item['content']:
124
  if item_i['type'] == 'image':
125
  item_i['type'] = 'image_url'
126
+ item_i['image_url'] = {'url': item_i['url']}
 
 
127
  del item_i['url']
128
  if base_url == default_base_url and api_model == default_api_model and api_key == "":
129
  api_key = os.environ['OPENROUTER_TOKEN']
 
142
  yield collected_text
143
 
144
 
145
+ def prefill_chatbot(img, description, more_imgs, engine, base_url, api_model, api_key):
146
+ history = get_init_prompt(img, description, more_imgs)
147
 
148
  ret = [{'role': 'assistant', 'content': ""}]
149
  for generated_text in generate(history, engine, base_url, api_model, api_key):
 
151
  yield ret
152
 
153
 
154
+ def response(message, history: list, img, description, more_imgs, engine, base_url, api_model, api_key):
155
  history = [{"role": item["role"], "content": [{"type": "text", "text": item["content"]}]} for item in history]
156
+ history = get_init_prompt(img, description, more_imgs) + history
157
  history.append(
158
  {"role": "user", "content": [{"type": "text", "text": message}]}
159
  )
 
166
  gr.HTML(_("title"))
167
  img = gr.Image(type="filepath", value=default_img, label=_("upload"), render=False)
168
  description = gr.TextArea(value=_("default_description"), label=_("additional_description"), render=False)
169
+ more_imgs = gr.Files(
170
+ label=_("more_imgs"),
171
+ file_types=["image"],
172
+ render=False
173
+ )
174
  confirm_btn = gr.Button(_("confirm"), render=False)
175
  chatbot = gr.Chatbot(height=600, type='messages', label=_("chatbox"), render=False)
176
  engine = gr.Radio([(_('local'), 'local'), ('API', 'api')],
 
183
  img.render()
184
  with gr.Tab(_("description")):
185
  description.render()
186
+ more_imgs.render()
187
  with gr.Tab(_("more_options")):
188
  engine.render()
189
  base_url.render()
 
195
  response,
196
  chatbot=chatbot,
197
  type="messages",
198
+ additional_inputs=[img, description, more_imgs, engine, base_url, api_model, api_key],
199
  )
200
+ confirm_btn.click(prefill_chatbot, [img, description, more_imgs, engine, base_url, api_model, api_key], chat.chatbot)\
201
  .then(lambda x: x, chat.chatbot, chat.chatbot_value)
202
 
203
 
readme.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Refsheet Chat
3
- emoji: 📉
4
  colorFrom: gray
5
  colorTo: green
6
  sdk: gradio
@@ -11,7 +11,7 @@ license: mit
11
  short_description: Chat with a character via reference sheet!
12
  ---
13
 
14
- # Chat with Reference Sheet
15
 
16
  A demo of [Gemma 3](https://blog.google/technology/developers/gemma-3/), demonstrating its excellent vision and multilingual capability.
17
 
 
1
  ---
2
  title: Refsheet Chat
3
+ emoji: 💬
4
  colorFrom: gray
5
  colorTo: green
6
  sdk: gradio
 
11
  short_description: Chat with a character via reference sheet!
12
  ---
13
 
14
+ # Chat via Reference Sheet
15
 
16
  A demo of [Gemma 3](https://blog.google/technology/developers/gemma-3/), demonstrating its excellent vision and multilingual capability.
17