Spaces:
Sleeping
Sleeping
# Visual question answering from | |
# https://learn.deeplearning.ai/courses/open-source-models-hugging-face/lesson/13/multimodal-visual-question-answering | |
# | |
from transformers import BlipForQuestionAnswering | |
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") | |
from transformers import AutoProcessor | |
processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base") | |
from PIL import Image | |
import gradio as gr | |
def answering(image, question): | |
inputs = processor(image, question, return_tensors="pt") | |
out = model.generate(**inputs) | |
output = processor.decode(out[0], skip_special_tokens=True) | |
return output | |
gr.close_all() | |
app = gr.Interface(fn=answering, | |
inputs=[gr.Image(label="Picture here", type="pil"), | |
gr.Textbox(label="Question about picture here")], | |
outputs=[gr.Textbox(label="Answer"),], | |
title="Harza's application for answering questions about picture'", | |
description="Harza's miracle application that can answer questions about given picuture!'", | |
allow_flagging="never") | |
app.launch() | |
gr.close_all() | |