snowkylin commited on
Commit
133562c
·
1 Parent(s): 724ea7e

init commit

Browse files
Files changed (4) hide show
  1. app.py +176 -0
  2. main.py +38 -0
  3. readme.md +45 -0
  4. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_i18n import Translate, gettext as _
3
+ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
4
+ import torch
5
+ from threading import Thread
6
+ import requests
7
+ import json
8
+ import base64
9
+ from openai import OpenAI
10
+
11
+ default_img = None
12
+ default_base_url = "https://openrouter.ai/api/v1"
13
+ default_api_model = "google/gemma-3-27b-it:free"
14
+
15
+ model_id = "google/gemma-3-4b-it"
16
+
17
+ model = Gemma3ForConditionalGeneration.from_pretrained(
18
+ model_id, device_map="auto"
19
+ ).eval()
20
+
21
+ processor = AutoProcessor.from_pretrained(model_id)
22
+
23
+ generate_kwargs = {
24
+ 'max_new_tokens': 1000,
25
+ 'do_sample': True,
26
+ 'temperature': 1.0
27
+ }
28
+
29
+ lang_store = {
30
+ "und": {
31
+ "confirm": "Confirm",
32
+ "default_description": "",
33
+ "additional_description": "Character description (optional)",
34
+ "title": "<h1>Chat with a character via reference sheet!</h1>",
35
+ "upload": "Upload the reference sheet of your character here",
36
+ "prompt": "You are the character in the image. Start without confirmation.",
37
+ "additional_info_prompt": "Additional info: ",
38
+ "description": "Description",
39
+ "more_options": "More Options",
40
+ "method": "Method",
41
+ "base_url": "Base URL",
42
+ "api_model": "API Model",
43
+ "api_key": "API Key",
44
+ "local": "Local",
45
+ "chatbox": "Chat Box"
46
+ },
47
+ "zh": {
48
+ "confirm": "确认",
49
+ "default_description": "",
50
+ "additional_description": "角色描述(可选)",
51
+ "title": "<h1>与设定图中的角色聊天!</h1>",
52
+ "upload": "在这里上传角色设定图",
53
+ "prompt": "你的身份是图中的角色,使用中文。无需确认。",
54
+ "additional_info_prompt": "补充信息:",
55
+ "description": "角色描述",
56
+ "more_options": "更多选项",
57
+ "method": "方法",
58
+ "base_url": "API 地址",
59
+ "api_model": "API 模型",
60
+ "api_key": "API Key",
61
+ "local": "本地",
62
+ "chatbox": "聊天窗口"
63
+ },
64
+ }
65
+
66
+ def get_init_prompt(img, description):
67
+ prompt = _("prompt")
68
+ if description != "":
69
+ prompt += _("additional_info_prompt") + description
70
+ return [
71
+ {
72
+ "role": "user",
73
+ "content": [
74
+ {"type": "image", "url": img},
75
+ {"type": "text", "text": prompt}
76
+ ]
77
+ }
78
+ ]
79
+
80
+
81
+ def generate(history, engine, base_url, api_model, api_key):
82
+ if engine == 'local':
83
+ inputs = processor.apply_chat_template(
84
+ history, add_generation_prompt=True, tokenize=True,
85
+ return_dict=True, return_tensors="pt"
86
+ ).to(model.device, dtype=torch.bfloat16)
87
+
88
+ streamer = TextIteratorStreamer(processor, skip_prompt=True)
89
+
90
+ with torch.inference_mode():
91
+ thread = Thread(target=model.generate, kwargs=dict(**inputs, **generate_kwargs, streamer=streamer))
92
+ thread.start()
93
+
94
+ generated_text = ""
95
+ for new_text in streamer:
96
+ generated_text += new_text
97
+ yield generated_text
98
+ elif engine == 'api':
99
+ for item in history:
100
+ for item_i in item['content']:
101
+ if item_i['type'] == 'image':
102
+ item_i['type'] = 'image_url'
103
+ with open(item_i['url'], "rb") as image_file:
104
+ data = base64.b64encode(image_file.read()).decode("utf-8")
105
+ item_i['image_url'] = {'url': 'data:image/jpeg;base64,' + data}
106
+ del item_i['url']
107
+ client = OpenAI(base_url=base_url, api_key=api_key)
108
+ stream = client.chat.completions.create(
109
+ model=api_model,
110
+ messages=history,
111
+ stream=True,
112
+ temperature=generate_kwargs['temperature']
113
+ )
114
+ collected_text = ""
115
+ for chunk in stream:
116
+ delta = chunk.choices[0].delta
117
+ if delta.content:
118
+ collected_text += delta.content
119
+ yield collected_text
120
+
121
+
122
+ def prefill_chatbot(img, description, engine, base_url, api_model, api_key):
123
+ history = get_init_prompt(img, description)
124
+
125
+ ret = [{'role': 'assistant', 'content': ""}]
126
+ for generated_text in generate(history, engine, base_url, api_model, api_key):
127
+ ret[0]['content'] = generated_text
128
+ yield ret
129
+
130
+
131
+ def response(message, history: list, img, description, engine, base_url, api_model, api_key):
132
+ history = [{"role": item["role"], "content": [{"type": "text", "text": item["content"]}]} for item in history]
133
+ history = get_init_prompt(img, description) + history
134
+ history.append(
135
+ {"role": "user", "content": [{"type": "text", "text": message}]}
136
+ )
137
+ for generated_text in generate(history, engine, base_url, api_model, api_key):
138
+ yield generated_text
139
+
140
+
141
+ with gr.Blocks(title="Chat with a character via reference sheet!") as demo:
142
+ with Translate(lang_store) as lang:
143
+ gr.HTML(_("title"))
144
+ img = gr.Image(type="filepath", value=default_img, label=_("upload"), render=False)
145
+ description = gr.TextArea(value=_("default_description"), label=_("additional_description"), render=False)
146
+ confirm_btn = gr.Button(_("confirm"), render=False)
147
+ chatbot = gr.Chatbot(height=600, type='messages', label=_("chatbox"), render=False)
148
+ engine = gr.Radio([(_('local'), 'local'), ('API', 'api')],
149
+ value='local', label=_("method"), render=False, interactive=True)
150
+ base_url = gr.Textbox(label=_("base_url"), render=False, value=default_base_url)
151
+ api_model = gr.Textbox(label=_("api_model"), render=False, value=default_api_model)
152
+ api_key = gr.Textbox(label=_("api_key"), render=False)
153
+ with gr.Row():
154
+ with gr.Column(scale=4):
155
+ img.render()
156
+ with gr.Tab(_("description")):
157
+ description.render()
158
+ with gr.Tab(_("more_options")):
159
+ engine.render()
160
+ base_url.render()
161
+ api_model.render()
162
+ api_key.render()
163
+ confirm_btn.render()
164
+ with gr.Column(scale=6):
165
+ chat = gr.ChatInterface(
166
+ response,
167
+ chatbot=chatbot,
168
+ type="messages",
169
+ additional_inputs=[img, description, engine, base_url, api_model, api_key],
170
+ )
171
+ confirm_btn.click(prefill_chatbot, [img, description, engine, base_url, api_model, api_key], chat.chatbot)\
172
+ .then(lambda x: x, chat.chatbot, chat.chatbot_value)
173
+
174
+
175
+ if __name__ == "__main__":
176
+ demo.launch()
main.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import torch
3
+
4
+ pipe = pipeline(
5
+ "image-text-to-text",
6
+ model="google/gemma-3-4b-it",
7
+ device="cuda",
8
+ torch_dtype=torch.bfloat16,
9
+ )
10
+
11
+ messages = [
12
+ {
13
+ "role": "user",
14
+ "content": [
15
+ {"type": "image", "url": "snowkylin.jpg"},
16
+ {"type": "text", "text": "You are the character in the image. Start without confirmation."}
17
+ # {"type": "text", "text": "你的身份是图中的角色,使用中文。无需确认。"}
18
+ ]
19
+ }
20
+ ]
21
+
22
+ generate_kwargs = {
23
+ 'max_new_tokens': 1000,
24
+ 'do_sample': True,
25
+ 'temperature': 1.0
26
+ }
27
+
28
+ while True:
29
+ response = pipe(text=messages, generate_kwargs=generate_kwargs)
30
+
31
+ messages = response[0]['generated_text']
32
+ print(messages[-1]["content"])
33
+
34
+ content = input(">> ")
35
+
36
+ messages.append(
37
+ {"role": "user", "content": content}
38
+ )
readme.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Chat with Reference Sheet
2
+
3
+ A demo of [Gemma 3](https://blog.google/technology/developers/gemma-3/), demonstrating its excellent vision and multilingual capability.
4
+
5
+ ## Environment Configuration
6
+
7
+ Register an account on [HuggingFace](https://huggingface.co)
8
+
9
+ Submit a Gemma Access Request from <https://huggingface.co/google/gemma-3-4b-it>. The access should be granted immediately with an email notification. After that, the model page will show
10
+
11
+ > Gated model: You have been granted access to this model
12
+
13
+ Create conda environment with pip and Python 3.12
14
+ ```bash
15
+ conda create -n transformers_gemma pip python=3.12
16
+ conda activate transformers_gemma
17
+ ```
18
+
19
+ Install [HuggingFace Transformers for Gemma 3](https://github.com/huggingface/transformers/releases/tag/v4.49.0-Gemma-3):
20
+ ```bash
21
+ pip install git+https://github.com/huggingface/[email protected]
22
+ ```
23
+
24
+ Install [PyTorch](https://pytorch.org/get-started/locally/)
25
+
26
+ On Nvidia GPU (with CUDA 12.6):
27
+ ```bash
28
+ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
29
+ ```
30
+
31
+ On CPU:
32
+ ```bash
33
+ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
34
+ ```
35
+
36
+ Create an User Access Token from <https://huggingface.co/docs/hub/security-tokens>, then log in to your HuggingFace account with `huggingface-cli`:
37
+
38
+ ```bash
39
+ huggingface-cli login
40
+ ```
41
+
42
+ Copy-paste your access token and press enter.
43
+
44
+
45
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ gradio_i18n
3
+ git+https://github.com/huggingface/[email protected]
4
+ torch
5
+ openai