paligamma / app.py
ved1beta
appready
5b195c1
raw
history blame
2.3 kB
import gradio as gr
from PIL import Image
from transformers import AutoModelForCausalLM
from transformers import AutoProcessor
from transformers import TextIteratorStreamer
from threading import Thread
import torch
import spaces
model_id = "microsoft/Phi-3-vision-128k-instruct"
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="cpu",
trust_remote_code=True,
torch_dtype=torch.float32,
_attn_implementation="eager"
)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
@spaces.CPU
def bot_streaming(message, history):
try:
image = (message["files"][-1]["path"] if isinstance(message["files"][-1], dict) else message["files"][-1]) if message["files"] else None
if not image:
raise ValueError("No image uploaded")
conversation = []
for user, assistant in history:
conversation.extend([
{"role": "user", "content": user},
{"role": "assistant", "content": assistant or ""}
])
conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
image = Image.open(image)
inputs = processor(prompt, image, return_tensors="pt")
streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=256,
do_sample=False,
temperature=0.1,
eos_token_id=processor.tokenizer.eos_token_id
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer
except Exception as e:
yield f"Error: {str(e)}"
demo = gr.Blocks()
with demo:
gr.ChatInterface(
fn=bot_streaming,
title="Phi3 Vision 128K",
description="Multimodal AI Vision Model",
examples=[
{"text": "Describe this image", "files": ["./example.jpg"]},
]
)
demo.queue()
demo.launch(debug=True)