Spaces:

Pectics
/

Softie

Sleeping

App Files Files Community

Softie / app.py

Pectics

First trial

03d2f46 about 2 months ago

raw

history blame

2.28 kB

	import gradio as gr
	import spaces

	from threading import Thread
	from torch import bfloat16
	from transformers import Qwen2VLForConditionalGeneration, Qwen2VLProcessor, TextIteratorStreamer, AutoProcessor
	from qwen_vl_utils import process_vision_info

	model_path = "Pectics/Softie-VL-7B-250123"

	model = Qwen2VLForConditionalGeneration.from_pretrained(
	model_path,
	torch_dtype=bfloat16,
	#attn_implementation="flash_attention_2",
	device_map="auto",
	)
	min_pixels = 256 * 28 * 28
	max_pixels = 1280 * 28 * 28
	processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_path, min_pixels=min_pixels, max_pixels=max_pixels)

	@spaces.GPU
	def respond(
	message,
	history,
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	messages = [{"role": "system", "content": system_message}]
	for m in history:
	messages.append({"role": m["role"], "content": m["content"]})
	messages.append({"role": "user", "content": message})

	text_inputs = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	image_inputs, video_inputs = process_vision_info(messages)
	inputs = processor(
	text=[text_inputs],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt",
	).to("cuda")

	streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
	kwargs = dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	)
	thread = Thread(target=model.generate, kwargs=kwargs)
	thread.start()

	response = ""
	for token in streamer:
	response += token
	yield response

	app = gr.ChatInterface(
	respond,
	type="messages",
	additional_inputs=[
	gr.Textbox(value="You are Softie, a helpful assistant.", label="系统设定"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度"),
	gr.Slider(minimum=0.01, maximum=4.0, value=0.75, step=0.01, label="温度系数（Temperature）"),
	gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.01, label="核取样系数（Top-p）"),
	],
	)

	if __name__ == "__main__":
	app.launch()