FinLLaVA

Running on Zero

File size: 1,703 Bytes

ee668ff
ca317b2
 
ee668ff
ca317b2
ee668ff
03d6908
ca317b2
 
 
 
 
 
 
 
03d6908
ca317b2
 
 
 
 
 
 
ee668ff
 
03d6908
386e329
 
ca317b2
 
 
 
 
 
 
 
 
03d6908
 
ee668ff
03d6908
 
 
 
 
 
505f42a
386e329
 
ca317b2
03d6908
 
 
 
 
ee668ff
 
03d6908
ee668ff

import gradio as gr
from llava_llama3.serve.cli import chat_llava
from llava_llama3.model.builder import load_pretrained_model
from PIL import Image
import torch

# Model configuration
model_path = "TheFinAI/FinLLaVA"
device = "cuda"
conv_mode = "llama_3"
temperature = 0
max_new_tokens = 512
load_8bit = False
load_4bit = False

# Load the pretrained model
tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
    model_path, 
    None, 
    'llava_llama3', 
    load_8bit, 
    load_4bit, 
    device=device
)

# Define the prediction function
@spaces.GPU
def bot_streaming(image, text, history):
    output = chat_llava(
        args=None,
        image_file=image,
        text=text,
        tokenizer=tokenizer,
        model=llava_model,
        image_processor=image_processor,
        context_len=context_len
    )
    history.append((text, output))
    return history, gr.update(value="")

# Create the Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="FinLLaVA Chatbot")
    image_input = gr.Image(type="filepath", label="Upload Image")
    text_input = gr.Textbox(label="Enter your message")
    submit_btn = gr.Button("Submit")

    # Define interaction: when submit is clicked, call bot_streaming and update the chatbot
    submit_btn.click(fn=bot_streaming, inputs=[image_input, text_input, chatbot], outputs=[chatbot, text_input])

    # Add example inputs
    gr.Examples(
        examples=[["./bee.jpg", "What is on the flower?"],
                  ["./baklava.png", "How to make this pastry?"]],
        inputs=[image_input, text_input]
    )

# Launch the Gradio app
demo.queue(api_open=False)
demo.launch(show_api=False, share=False)