import gradio as gr from ofasys import OFATokenizer, OFAModel from PIL import Image import torch # Load the OFA tokenizer and model tokenizer = OFATokenizer.from_pretrained("OFA-Sys/ofa-base") model = OFAModel.from_pretrained("OFA-Sys/ofa-base", use_cache=True) def image_captioning(image): # Preprocess the image img = Image.open(image).convert("RGB") # Generate the caption inputs = tokenizer([img], return_tensors="pt") with torch.no_grad(): outputs = model.generate(**inputs) # Decode the output caption = tokenizer.decode(outputs[0], skip_special_tokens=True) return caption # Create a Gradio interface interface = gr.Interface( fn=image_captioning, inputs=gr.Image(label="Upload an Image", type="filepath"), outputs=gr.Textbox(label="Generated Caption"), title="OFA Image Captioning", description="Upload an image to generate a caption using the OFA model.", ) # Launch the interface interface.launch()