File size: 2,299 Bytes
45f9b3d
3373ce1
 
 
 
 
0dc6935
3373ce1
 
fa73fe7
cb872ce
 
 
 
5b195c1
fa73fe7
cb872ce
fa73fe7
3373ce1
dd39d5d
3373ce1
0dc6935
fa73fe7
 
 
 
3373ce1
fa73fe7
 
 
 
 
 
 
3373ce1
fa73fe7
 
 
 
3373ce1
fa73fe7
 
 
 
5b195c1
fa73fe7
 
 
 
 
 
 
 
 
 
 
 
3373ce1
fa73fe7
 
3373ce1
fa73fe7
 
3373ce1
fa73fe7
 
 
5b195c1
 
 
3373ce1
 
5b195c1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from PIL import Image 
from transformers import AutoModelForCausalLM 
from transformers import AutoProcessor 
from transformers import TextIteratorStreamer
from threading import Thread
import torch
import spaces

model_id = "microsoft/Phi-3-vision-128k-instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    device_map="cpu", 
    trust_remote_code=True, 
    torch_dtype=torch.float32,
    _attn_implementation="eager"
)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

@spaces.CPU
def bot_streaming(message, history):
    try:
        image = (message["files"][-1]["path"] if isinstance(message["files"][-1], dict) else message["files"][-1]) if message["files"] else None
        
        if not image:
            raise ValueError("No image uploaded")

        conversation = []
        for user, assistant in history:
            conversation.extend([
                {"role": "user", "content": user}, 
                {"role": "assistant", "content": assistant or ""}
            ])
        
        conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
        
        prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
        image = Image.open(image)
        inputs = processor(prompt, image, return_tensors="pt")

        streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
        generation_kwargs = dict(
            inputs, 
            streamer=streamer, 
            max_new_tokens=256,
            do_sample=False, 
            temperature=0.1, 
            eos_token_id=processor.tokenizer.eos_token_id
        )

        thread = Thread(target=model.generate, kwargs=generation_kwargs)
        thread.start()

        buffer = ""
        for new_text in streamer:
            buffer += new_text
            yield buffer

    except Exception as e:
        yield f"Error: {str(e)}"

demo = gr.Blocks()
with demo:
    gr.ChatInterface(
        fn=bot_streaming,
        title="Phi3 Vision 128K",
        description="Multimodal AI Vision Model",
        examples=[
            {"text": "Describe this image", "files": ["./example.jpg"]},
        ]
    )

demo.queue()
demo.launch(debug=True)