File size: 821 Bytes
84f9b87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55eaaa7
84f9b87
 
 
 
 
 
5fadd16
84f9b87
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import torch
from PIL import Image
import gradio as gr

from lavis.models import load_model_and_preprocess

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model, vis_processors, _ = load_model_and_preprocess(
    name="blip_caption", model_type="large_coco", is_eval=True, device=device
)

def predict(image):
    pre_processed_image = vis_processors["eval"](image).unsqueeze(0).to(device)
    response = model.generate({"image": pre_processed_image}, use_nucleus_sampling=True, num_captions=3)
    return image, "\n".join(response)

demo = gr.Interface(
    title="Image Captioning - BLIP",
    fn=predict,
    inputs=gr.Image(type='pil', label="Original Image"),
    outputs=[gr.Image(type="pil"), gr.Textbox()],
    examples=["example_1.jpg", "example_2.jpg", "example_3.jpg"],
)

demo.launch()