Spaces:

fffiloni
/

whisperspeech-dreamtalk-combo

Running

File size: 1,919 Bytes

import gradio as gr
from gradio_client import Client, handle_file

def get_speech(text, voice):
    
    client = Client("collabora/WhisperSpeech")
    result = client.predict(
        multilingual_text=text,
        speaker_audio=voice,
        speaker_url="",
        cps=14,
        api_name="/whisper_speech_demo"
    )
    print(result)
    
    return result

def get_dreamtalk(image_in, speech):
    client = Client("fffiloni/dreamtalk")
    result = client.predict(
        audio_input=handle_file(speech),
        image_path=handle_file(image_in),
        emotional_style="M030_front_neutral_level1_001.mat",
        api_name="/infer"
    )
    print(result)
    return result['video']

def pipe (text, voice, image_in):

    speech = get_speech(text, voice)
    
    try:
        video = get_dreamtalk(image_in, speech)
    except:
       
        raise gr.Error('An error occurred while loading DreamTalk: Image may not contain any face')

    return video

with gr.Blocks() as demo:
    with gr.Column():
        gr.HTML("""
        <h2 style="text-align: center;">
        Whisper Speech X Dreamtalk
        </h2>
        <p style="text-align: center;"></p>
        """)
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(label="Portrait IN", type="filepath", value="./einstein.jpg")
            with gr.Column():
                voice = gr.Audio(type="filepath", label="Upload or Record Speaker audio (Optional voice cloning)")
                text = gr.Textbox(label="text")
                submit_btn = gr.Button('Submit')
            with gr.Column():
                video_o = gr.Video(label="Video result")
    submit_btn.click(
        fn = pipe,
        inputs = [
            text, voice, image_in
        ],
        outputs = [
            video_o
        ],
        concurrency_limit = 3
    )
demo.queue(max_size=10).launch(show_error=True, show_api=False)