|
import torch |
|
import gradio as gr |
|
from transformers import TextIteratorStreamer, AutoProcessor, LlavaForConditionalGeneration |
|
from PIL import Image |
|
import threading |
|
import spaces |
|
import accelerate |
|
import time |
|
|
|
DESCRIPTION = ''' |
|
<div> |
|
<h1 style="text-align: center;">Krypton π</h1> |
|
<p>This uses an Open Source model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p> |
|
</div> |
|
''' |
|
|
|
model_id = "xtuner/llava-llama-3-8b-v1_1-transformers" |
|
model = LlavaForConditionalGeneration.from_pretrained( |
|
model_id, |
|
torch_dtype=torch.float16, |
|
low_cpu_mem_usage=True |
|
).to('cuda') |
|
|
|
processor = AutoProcessor.from_pretrained(model_id) |
|
|
|
model.generation_config.eos_token_id = 128009 |
|
|
|
@spaces.GPU(duration=120) |
|
def krypton(input, |
|
history, |
|
max_new_tokens, |
|
temperature, |
|
num_beams, |
|
do_sample: bool=True): |
|
""" |
|
Recieves inputs (prompts with images if they were added), |
|
the image is formated for pil and prompt is formated for the model, |
|
to place it's output to the user, these prompts and images are passed in |
|
the processor and generation of the model, than the output is decoded from the processor, |
|
onto the UI. |
|
""" |
|
if input["files"]: |
|
if type(input["files"][-1]) == dict: |
|
image = input["files"][-1]["path"] |
|
else: |
|
image = input["files"][-1] |
|
else: |
|
|
|
|
|
for hist in history: |
|
if type(hist[0]) == tuple: |
|
image = hist[0][0] |
|
try: |
|
if image is None: |
|
gr.Error("You need to upload an image please for krypton to work.") |
|
except NameError: |
|
|
|
gr.Error("Uplaod an image for Krypton to work") |
|
|
|
prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\n{input['text']}<|eot_id|>" |
|
"<|start_header_id|>assistant<|end_header_id|>\n\n") |
|
|
|
image = Image.open(image) |
|
inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16) |
|
|
|
|
|
streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": False, "skip_prompt": True}) |
|
|
|
if temperature == 0.0: |
|
do_sample = False |
|
|
|
|
|
generation_kwargs = dict( |
|
inputs=inputs, |
|
streamer=streamer, |
|
max_new_tokens=max_new_tokens, |
|
temperature=temperature, |
|
num_beams=num_beams, |
|
do_sample=do_sample |
|
) |
|
|
|
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) |
|
thread.start() |
|
|
|
buffer = "" |
|
time.sleep(0.5) |
|
for new_text in streamer: |
|
|
|
if "<|eot_id|>" in new_text: |
|
new_text = new_text.split("<|eot_id|>")[0] |
|
buffer += new_text |
|
|
|
|
|
generated_text_without_prompt = buffer |
|
|
|
time.sleep(0.06) |
|
|
|
yield generated_text_without_prompt |
|
|
|
chatbot=gr.Chatbot(height=600, label="Krypt AI") |
|
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter your question or upload an image.", show_label=False) |
|
with gr.Blocks(fill_height=True) as demo: |
|
gr.Markdown(DESCRIPTION) |
|
gr.ChatInterface( |
|
fn=krypton, |
|
chatbot=chatbot, |
|
fill_height=True, |
|
additional_inputs_accordion=gr.Accordion(label="βοΈ Parameters", open=False, render=False), |
|
additional_inputs=[ |
|
gr.Slider(minimum=20, |
|
maximum=80, |
|
step=1, |
|
value=50, |
|
label="Max New Tokens", |
|
render=False), |
|
gr.Slider(minimum=0.0, |
|
maximum=1.0, |
|
step=0.1, |
|
value=0.7, |
|
label="Temperature", |
|
render=False), |
|
gr.Slider(minimum=1, |
|
maximum=12, |
|
step=1, |
|
value=5, |
|
label="Number of Beams", |
|
render=False), |
|
], |
|
multimodal=True, |
|
textbox=chat_input, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|