medelharchaoui's picture
Create app.py
a7ab7f4 verified
raw
history blame
720 Bytes
import gradio as gr
from transformers import pipeline
img_text_pipe = pipeline("image-to-text",
model="./models/Salesforce/blip-image-captioning-base")
narrator = pipeline("text-to-speech",
model="./models/kakao-enterprise/vits-ljs")
def describe_image(file_path):
img_text_pip_output = img_text_pipe(file_path)
description_text = img_text_pip_output[0]['generated_text']
narrated_text = narrator(description_text)
return narrated_text["audio"][0]
iface = gr.Interface(fn=describe_image,
inputs=gr.Image(label="Input image",
type="pil"),
outputs="audio"
)
iface.launch()