File size: 2,813 Bytes
fd7c55d
 
 
 
 
d7a8fbe
c313ae5
fd7c55d
 
c313ae5
fd7c55d
 
 
 
c313ae5
fd7c55d
 
c313ae5
fd7c55d
 
 
 
 
3b1f0f3
fd7c55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1f0f3
fd7c55d
 
c313ae5
fd7c55d
 
 
 
 
 
 
 
 
 
 
 
c313ae5
fd7c55d
 
 
c313ae5
 
fd7c55d
 
 
 
2663d66
fd7c55d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c313ae5
fd7c55d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89


import gradio as gr
import requests
import json
import os

# Load the API key from environment variables
XI_API_KEY = os.getenv("XI_API_KEY")

# Define the function to perform the Speech-to-Speech transformation
def sts_conversion(voice_id, audio_file):
    CHUNK_SIZE = 1024
    OUTPUT_PATH = "output.mp3"

    # Construct the URL for the Speech-to-Speech API request
    sts_url = f"https://api.elevenlabs.io/v1/speech-to-speech/{voice_id}/stream"

    # Set up headers for the API request, including the API key for authentication
    headers = {
        "Accept": "application/json",
        "xi-api-key": XI_API_KEY
    }

    # Set up the data payload for the API request, including model ID and voice settings
    data = {
        "model_id": "eleven_english_sts_v2",
        "voice_settings": json.dumps({
            "stability": 0.5,
            "similarity_boost": 0.8,
            "style": 0.0,
            "use_speaker_boost": True
        })
    }

    # Set up the files to send with the request, including the input audio file
    files = {
        "audio": audio_file
    }

    # Make the POST request to the STS API with headers, data, and files, enabling streaming response
    response = requests.post(sts_url, headers=headers, data=data, files=files, stream=True)

    # Check if the request was successful
    if response.ok:
        # Open the output file in write-binary mode
        with open(OUTPUT_PATH, "wb") as f:
            # Read the response in chunks and write to the file
            for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
                f.write(chunk)
        # Return the output audio file for download
        return OUTPUT_PATH
    else:
        # Return the error message if the request was not successful
        return response.text

# Create the Gradio Blocks UI
with gr.Blocks(theme="Hev832/Applio") as demo:
    gr.Markdown("# <center> ElevenLabs Speech-to-Speech Conversion </center>")


    gr.Markdown("Upload an audio file and enter the Voice ID to convert it using the Eleven Labs Speech-to-Speech API.")

    with gr.Row():
        voice_id = gr.Textbox(label="Voice ID")
        audio_file = gr.Audio(source="upload", type="filepath", label="Input Audio")

    with gr.Row():
        output_audio = gr.File(label="Output Audio")
        error_message = gr.Textbox(label="Error Message", visible=False)

    def process_audio(voice_id, audio_file):
        result = sts_conversion(voice_id, audio_file)
        if result.endswith('.mp3'):
            return result, ""
        else:
            return None, result

    submit_btn = gr.Button("Convert")

    submit_btn.click(
        process_audio, 
        inputs=[voice_id, audio_file], 
        outputs=[output_audio, error_message]
    )

# Launch the Blocks interface
demo.launch()