import gradio as gr
from ofasys import OFATokenizer, OFAModel
from PIL import Image
import torch

# Load the OFA tokenizer and model
tokenizer = OFATokenizer.from_pretrained("OFA-Sys/ofa-base")
model = OFAModel.from_pretrained("OFA-Sys/ofa-base", use_cache=True)

def image_captioning(image):
    # Preprocess the image
    img = Image.open(image).convert("RGB")

    # Generate the caption
    inputs = tokenizer([img], return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs)

    # Decode the output
    caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return caption

# Create a Gradio interface
interface = gr.Interface(
    fn=image_captioning,
    inputs=gr.Image(label="Upload an Image", type="filepath"),
    outputs=gr.Textbox(label="Generated Caption"),
    title="OFA Image Captioning",
    description="Upload an image to generate a caption using the OFA model.",
)

# Launch the interface
interface.launch()