Spaces:
Running
Running
snowkylin
commited on
Commit
·
133562c
1
Parent(s):
724ea7e
init commit
Browse files
app.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_i18n import Translate, gettext as _
|
3 |
+
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
|
4 |
+
import torch
|
5 |
+
from threading import Thread
|
6 |
+
import requests
|
7 |
+
import json
|
8 |
+
import base64
|
9 |
+
from openai import OpenAI
|
10 |
+
|
11 |
+
default_img = None
|
12 |
+
default_base_url = "https://openrouter.ai/api/v1"
|
13 |
+
default_api_model = "google/gemma-3-27b-it:free"
|
14 |
+
|
15 |
+
model_id = "google/gemma-3-4b-it"
|
16 |
+
|
17 |
+
model = Gemma3ForConditionalGeneration.from_pretrained(
|
18 |
+
model_id, device_map="auto"
|
19 |
+
).eval()
|
20 |
+
|
21 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
22 |
+
|
23 |
+
generate_kwargs = {
|
24 |
+
'max_new_tokens': 1000,
|
25 |
+
'do_sample': True,
|
26 |
+
'temperature': 1.0
|
27 |
+
}
|
28 |
+
|
29 |
+
lang_store = {
|
30 |
+
"und": {
|
31 |
+
"confirm": "Confirm",
|
32 |
+
"default_description": "",
|
33 |
+
"additional_description": "Character description (optional)",
|
34 |
+
"title": "<h1>Chat with a character via reference sheet!</h1>",
|
35 |
+
"upload": "Upload the reference sheet of your character here",
|
36 |
+
"prompt": "You are the character in the image. Start without confirmation.",
|
37 |
+
"additional_info_prompt": "Additional info: ",
|
38 |
+
"description": "Description",
|
39 |
+
"more_options": "More Options",
|
40 |
+
"method": "Method",
|
41 |
+
"base_url": "Base URL",
|
42 |
+
"api_model": "API Model",
|
43 |
+
"api_key": "API Key",
|
44 |
+
"local": "Local",
|
45 |
+
"chatbox": "Chat Box"
|
46 |
+
},
|
47 |
+
"zh": {
|
48 |
+
"confirm": "确认",
|
49 |
+
"default_description": "",
|
50 |
+
"additional_description": "角色描述(可选)",
|
51 |
+
"title": "<h1>与设定图中的角色聊天!</h1>",
|
52 |
+
"upload": "在这里上传角色设定图",
|
53 |
+
"prompt": "你的身份是图中的角色,使用中文。无需确认。",
|
54 |
+
"additional_info_prompt": "补充信息:",
|
55 |
+
"description": "角色描述",
|
56 |
+
"more_options": "更多选项",
|
57 |
+
"method": "方法",
|
58 |
+
"base_url": "API 地址",
|
59 |
+
"api_model": "API 模型",
|
60 |
+
"api_key": "API Key",
|
61 |
+
"local": "本地",
|
62 |
+
"chatbox": "聊天窗口"
|
63 |
+
},
|
64 |
+
}
|
65 |
+
|
66 |
+
def get_init_prompt(img, description):
|
67 |
+
prompt = _("prompt")
|
68 |
+
if description != "":
|
69 |
+
prompt += _("additional_info_prompt") + description
|
70 |
+
return [
|
71 |
+
{
|
72 |
+
"role": "user",
|
73 |
+
"content": [
|
74 |
+
{"type": "image", "url": img},
|
75 |
+
{"type": "text", "text": prompt}
|
76 |
+
]
|
77 |
+
}
|
78 |
+
]
|
79 |
+
|
80 |
+
|
81 |
+
def generate(history, engine, base_url, api_model, api_key):
|
82 |
+
if engine == 'local':
|
83 |
+
inputs = processor.apply_chat_template(
|
84 |
+
history, add_generation_prompt=True, tokenize=True,
|
85 |
+
return_dict=True, return_tensors="pt"
|
86 |
+
).to(model.device, dtype=torch.bfloat16)
|
87 |
+
|
88 |
+
streamer = TextIteratorStreamer(processor, skip_prompt=True)
|
89 |
+
|
90 |
+
with torch.inference_mode():
|
91 |
+
thread = Thread(target=model.generate, kwargs=dict(**inputs, **generate_kwargs, streamer=streamer))
|
92 |
+
thread.start()
|
93 |
+
|
94 |
+
generated_text = ""
|
95 |
+
for new_text in streamer:
|
96 |
+
generated_text += new_text
|
97 |
+
yield generated_text
|
98 |
+
elif engine == 'api':
|
99 |
+
for item in history:
|
100 |
+
for item_i in item['content']:
|
101 |
+
if item_i['type'] == 'image':
|
102 |
+
item_i['type'] = 'image_url'
|
103 |
+
with open(item_i['url'], "rb") as image_file:
|
104 |
+
data = base64.b64encode(image_file.read()).decode("utf-8")
|
105 |
+
item_i['image_url'] = {'url': 'data:image/jpeg;base64,' + data}
|
106 |
+
del item_i['url']
|
107 |
+
client = OpenAI(base_url=base_url, api_key=api_key)
|
108 |
+
stream = client.chat.completions.create(
|
109 |
+
model=api_model,
|
110 |
+
messages=history,
|
111 |
+
stream=True,
|
112 |
+
temperature=generate_kwargs['temperature']
|
113 |
+
)
|
114 |
+
collected_text = ""
|
115 |
+
for chunk in stream:
|
116 |
+
delta = chunk.choices[0].delta
|
117 |
+
if delta.content:
|
118 |
+
collected_text += delta.content
|
119 |
+
yield collected_text
|
120 |
+
|
121 |
+
|
122 |
+
def prefill_chatbot(img, description, engine, base_url, api_model, api_key):
|
123 |
+
history = get_init_prompt(img, description)
|
124 |
+
|
125 |
+
ret = [{'role': 'assistant', 'content': ""}]
|
126 |
+
for generated_text in generate(history, engine, base_url, api_model, api_key):
|
127 |
+
ret[0]['content'] = generated_text
|
128 |
+
yield ret
|
129 |
+
|
130 |
+
|
131 |
+
def response(message, history: list, img, description, engine, base_url, api_model, api_key):
|
132 |
+
history = [{"role": item["role"], "content": [{"type": "text", "text": item["content"]}]} for item in history]
|
133 |
+
history = get_init_prompt(img, description) + history
|
134 |
+
history.append(
|
135 |
+
{"role": "user", "content": [{"type": "text", "text": message}]}
|
136 |
+
)
|
137 |
+
for generated_text in generate(history, engine, base_url, api_model, api_key):
|
138 |
+
yield generated_text
|
139 |
+
|
140 |
+
|
141 |
+
with gr.Blocks(title="Chat with a character via reference sheet!") as demo:
|
142 |
+
with Translate(lang_store) as lang:
|
143 |
+
gr.HTML(_("title"))
|
144 |
+
img = gr.Image(type="filepath", value=default_img, label=_("upload"), render=False)
|
145 |
+
description = gr.TextArea(value=_("default_description"), label=_("additional_description"), render=False)
|
146 |
+
confirm_btn = gr.Button(_("confirm"), render=False)
|
147 |
+
chatbot = gr.Chatbot(height=600, type='messages', label=_("chatbox"), render=False)
|
148 |
+
engine = gr.Radio([(_('local'), 'local'), ('API', 'api')],
|
149 |
+
value='local', label=_("method"), render=False, interactive=True)
|
150 |
+
base_url = gr.Textbox(label=_("base_url"), render=False, value=default_base_url)
|
151 |
+
api_model = gr.Textbox(label=_("api_model"), render=False, value=default_api_model)
|
152 |
+
api_key = gr.Textbox(label=_("api_key"), render=False)
|
153 |
+
with gr.Row():
|
154 |
+
with gr.Column(scale=4):
|
155 |
+
img.render()
|
156 |
+
with gr.Tab(_("description")):
|
157 |
+
description.render()
|
158 |
+
with gr.Tab(_("more_options")):
|
159 |
+
engine.render()
|
160 |
+
base_url.render()
|
161 |
+
api_model.render()
|
162 |
+
api_key.render()
|
163 |
+
confirm_btn.render()
|
164 |
+
with gr.Column(scale=6):
|
165 |
+
chat = gr.ChatInterface(
|
166 |
+
response,
|
167 |
+
chatbot=chatbot,
|
168 |
+
type="messages",
|
169 |
+
additional_inputs=[img, description, engine, base_url, api_model, api_key],
|
170 |
+
)
|
171 |
+
confirm_btn.click(prefill_chatbot, [img, description, engine, base_url, api_model, api_key], chat.chatbot)\
|
172 |
+
.then(lambda x: x, chat.chatbot, chat.chatbot_value)
|
173 |
+
|
174 |
+
|
175 |
+
if __name__ == "__main__":
|
176 |
+
demo.launch()
|
main.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
import torch
|
3 |
+
|
4 |
+
pipe = pipeline(
|
5 |
+
"image-text-to-text",
|
6 |
+
model="google/gemma-3-4b-it",
|
7 |
+
device="cuda",
|
8 |
+
torch_dtype=torch.bfloat16,
|
9 |
+
)
|
10 |
+
|
11 |
+
messages = [
|
12 |
+
{
|
13 |
+
"role": "user",
|
14 |
+
"content": [
|
15 |
+
{"type": "image", "url": "snowkylin.jpg"},
|
16 |
+
{"type": "text", "text": "You are the character in the image. Start without confirmation."}
|
17 |
+
# {"type": "text", "text": "你的身份是图中的角色,使用中文。无需确认。"}
|
18 |
+
]
|
19 |
+
}
|
20 |
+
]
|
21 |
+
|
22 |
+
generate_kwargs = {
|
23 |
+
'max_new_tokens': 1000,
|
24 |
+
'do_sample': True,
|
25 |
+
'temperature': 1.0
|
26 |
+
}
|
27 |
+
|
28 |
+
while True:
|
29 |
+
response = pipe(text=messages, generate_kwargs=generate_kwargs)
|
30 |
+
|
31 |
+
messages = response[0]['generated_text']
|
32 |
+
print(messages[-1]["content"])
|
33 |
+
|
34 |
+
content = input(">> ")
|
35 |
+
|
36 |
+
messages.append(
|
37 |
+
{"role": "user", "content": content}
|
38 |
+
)
|
readme.md
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Chat with Reference Sheet
|
2 |
+
|
3 |
+
A demo of [Gemma 3](https://blog.google/technology/developers/gemma-3/), demonstrating its excellent vision and multilingual capability.
|
4 |
+
|
5 |
+
## Environment Configuration
|
6 |
+
|
7 |
+
Register an account on [HuggingFace](https://huggingface.co)
|
8 |
+
|
9 |
+
Submit a Gemma Access Request from <https://huggingface.co/google/gemma-3-4b-it>. The access should be granted immediately with an email notification. After that, the model page will show
|
10 |
+
|
11 |
+
> Gated model: You have been granted access to this model
|
12 |
+
|
13 |
+
Create conda environment with pip and Python 3.12
|
14 |
+
```bash
|
15 |
+
conda create -n transformers_gemma pip python=3.12
|
16 |
+
conda activate transformers_gemma
|
17 |
+
```
|
18 |
+
|
19 |
+
Install [HuggingFace Transformers for Gemma 3](https://github.com/huggingface/transformers/releases/tag/v4.49.0-Gemma-3):
|
20 |
+
```bash
|
21 |
+
pip install git+https://github.com/huggingface/[email protected]
|
22 |
+
```
|
23 |
+
|
24 |
+
Install [PyTorch](https://pytorch.org/get-started/locally/)
|
25 |
+
|
26 |
+
On Nvidia GPU (with CUDA 12.6):
|
27 |
+
```bash
|
28 |
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
|
29 |
+
```
|
30 |
+
|
31 |
+
On CPU:
|
32 |
+
```bash
|
33 |
+
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
34 |
+
```
|
35 |
+
|
36 |
+
Create an User Access Token from <https://huggingface.co/docs/hub/security-tokens>, then log in to your HuggingFace account with `huggingface-cli`:
|
37 |
+
|
38 |
+
```bash
|
39 |
+
huggingface-cli login
|
40 |
+
```
|
41 |
+
|
42 |
+
Copy-paste your access token and press enter.
|
43 |
+
|
44 |
+
|
45 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
gradio_i18n
|
3 |
+
git+https://github.com/huggingface/[email protected]
|
4 |
+
torch
|
5 |
+
openai
|