medelharchaoui's picture
Update app.py
7812e4e verified
raw
history blame
702 Bytes
import gradio as gr
from transformers import pipeline
img_text_pipe = pipeline("image-to-text",
model="Salesforce/blip-image-captioning-base")
narrator = pipeline("text-to-speech",
model="kakao-enterprise/vits-ljs")
def describe_image(file_path):
img_text_pip_output = img_text_pipe(file_path)
description_text = img_text_pip_output[0]['generated_text']
narrated_text = narrator(description_text)
return narrated_text["audio"][0]
iface = gr.Interface(fn=describe_image,
inputs=gr.Image(label="Input image",
type="pil"),
outputs="audio"
)
iface.launch()