elevenlabs_mods

Running

File size: 2,813 Bytes

fd7c55d
 
 
 
 
d7a8fbe
c313ae5
fd7c55d
 
c313ae5
fd7c55d
 
 
 
c313ae5
fd7c55d
 
c313ae5
fd7c55d
 
 
 
 
3b1f0f3
fd7c55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1f0f3
fd7c55d
 
c313ae5
fd7c55d
 
 
 
 
 
 
 
 
 
 
 
c313ae5
fd7c55d
 
 
c313ae5
 
fd7c55d
 
 
 
2663d66
fd7c55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c313ae5
fd7c55d



import gradio as gr
import requests
import json
import os

# Load the API key from environment variables
XI_API_KEY = os.getenv("XI_API_KEY")

# Define the function to perform the Speech-to-Speech transformation
def sts_conversion(voice_id, audio_file):
    CHUNK_SIZE = 1024
    OUTPUT_PATH = "output.mp3"

    # Construct the URL for the Speech-to-Speech API request
    sts_url = f"https://api.elevenlabs.io/v1/speech-to-speech/{voice_id}/stream"

    # Set up headers for the API request, including the API key for authentication
    headers = {
        "Accept": "application/json",
        "xi-api-key": XI_API_KEY
    }

    # Set up the data payload for the API request, including model ID and voice settings
    data = {
        "model_id": "eleven_english_sts_v2",
        "voice_settings": json.dumps({
            "stability": 0.5,
            "similarity_boost": 0.8,
            "style": 0.0,
            "use_speaker_boost": True
        })
    }

    # Set up the files to send with the request, including the input audio file
    files = {
        "audio": audio_file
    }

    # Make the POST request to the STS API with headers, data, and files, enabling streaming response
    response = requests.post(sts_url, headers=headers, data=data, files=files, stream=True)

    # Check if the request was successful
    if response.ok:
        # Open the output file in write-binary mode
        with open(OUTPUT_PATH, "wb") as f:
            # Read the response in chunks and write to the file
            for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
                f.write(chunk)
        # Return the output audio file for download
        return OUTPUT_PATH
    else:
        # Return the error message if the request was not successful
        return response.text

# Create the Gradio Blocks UI
with gr.Blocks(theme="Hev832/Applio") as demo:
    gr.Markdown("# <center> ElevenLabs Speech-to-Speech Conversion </center>")


    gr.Markdown("Upload an audio file and enter the Voice ID to convert it using the Eleven Labs Speech-to-Speech API.")

    with gr.Row():
        voice_id = gr.Textbox(label="Voice ID")
        audio_file = gr.Audio(source="upload", type="filepath", label="Input Audio")

    with gr.Row():
        output_audio = gr.File(label="Output Audio")
        error_message = gr.Textbox(label="Error Message", visible=False)

    def process_audio(voice_id, audio_file):
        result = sts_conversion(voice_id, audio_file)
        if result.endswith('.mp3'):
            return result, ""
        else:
            return None, result

    submit_btn = gr.Button("Convert")

    submit_btn.click(
        process_audio, 
        inputs=[voice_id, audio_file], 
        outputs=[output_audio, error_message]
    )

# Launch the Blocks interface
demo.launch()