elevenlabs_mods

Running

App Files Files Community

Hev832 commited on Aug 26, 2024

Commit

fd7c55d

verified ·

1 Parent(s): d7a8fbe

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -75

app.py CHANGED Viewed

@@ -1,87 +1,88 @@
-import random
-import gradio as gr
-import numpy as np
-import time
-from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
 import os
-def pad_buffer(audio):
-    # Pad buffer to multiple of 2 bytes
-    buffer_size = len(audio)
-    element_size = np.dtype(np.int16).itemsize
-    if buffer_size % element_size != 0:
-        audio = audio + b'\0' * (element_size - (buffer_size % element_size))
-    return audio
-def generate_voice(text, voice_name):
-    api_key = os.environ.get(eleven_api_key)
-    set_api_key(api_key) #set API key
-    try:
-        audio = generate(
-            text[:4000], # Limit to 4000 characters
-            voice=voice_name,
-            model="eleven_multilingual_v2"
-        )
-        return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
-    except UnauthenticatedRateLimitError as e:
-        raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
-    except Exception as e:
-        raise gr.Error(e)
-    # description = """
-    # Eleven Multilingual V2 is the world's best Text-to-Speech model. Features 38 voices and supports 28 languages. Sign up on [ElevenLabs](https://elevenlabs.io/?from=partnerpierce7156) to get an API Key.
-    # """
-with gr.Blocks(theme='Hev832/Applio') as block:
-    gr.Markdown('[ ![ElevenLabs](https://user-images.githubusercontent.com/12028621/262629275-4f85c9cf-85b6-435e-ab50-5b8c7c4e9dd2.png) ](https://elevenlabs.io)')
-    gr.Markdown("# <center> ElevenLabs </center>")
-    #gr.Markdown(description)
-    with gr.Row(variant='panel'):
-        all_voices = voices()
-        input_voice = gr.Dropdown(
-            [ voice.name for voice in all_voices ],
-            value="Rachel",
-            label="Voice",
-            elem_id="input_voice"
-        )
-    input_text = gr.Textbox(
-        label="Input Text (4000 characters max)",
-        lines=1,
-        value="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! Γειά σας! Здравей! வணக்கம்!",
-        elem_id="input_text"
-    )
-    run_button = gr.Button(
-        text="Generate Voice",
-        type="button",
-        variant="primary"
-    )
-    out_audio = gr.Audio(
-        label="Speech Output",
-        type="numpy",
-        elem_id="out_audio",
-        format="mp3"
-    )
-    inputs = [input_text, input_voice]
-    outputs = [out_audio]
-    run_button.click(
-        fn=generate_voice,
-        inputs=inputs,
-        outputs=outputs,
-        queue=True
-    )
-block.queue(concurrency_count=5).launch(debug=True)

+import gradio as gr
+import requests
+import json
 import os
+# Load the API key from environment variables
+XI_API_KEY = os.getenv("XI_API_KEY")
+# Define the function to perform the Speech-to-Speech transformation
+def sts_conversion(voice_id, audio_file):
+    CHUNK_SIZE = 1024
+    OUTPUT_PATH = "output.mp3"
+    # Construct the URL for the Speech-to-Speech API request
+    sts_url = f"https://api.elevenlabs.io/v1/speech-to-speech/{voice_id}/stream"
+    # Set up headers for the API request, including the API key for authentication
+    headers = {
+        "Accept": "application/json",
+        "xi-api-key": XI_API_KEY
+    }
+    # Set up the data payload for the API request, including model ID and voice settings
+    data = {
+        "model_id": "eleven_english_sts_v2",
+        "voice_settings": json.dumps({
+            "stability": 0.5,
+            "similarity_boost": 0.8,
+            "style": 0.0,
+            "use_speaker_boost": True
+        })
+    }
+    # Set up the files to send with the request, including the input audio file
+    files = {
+        "audio": audio_file
+    }
+    # Make the POST request to the STS API with headers, data, and files, enabling streaming response
+    response = requests.post(sts_url, headers=headers, data=data, files=files, stream=True)
+    # Check if the request was successful
+    if response.ok:
+        # Open the output file in write-binary mode
+        with open(OUTPUT_PATH, "wb") as f:
+            # Read the response in chunks and write to the file
+            for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
+                f.write(chunk)
+        # Return the output audio file for download
+        return OUTPUT_PATH
+    else:
+        # Return the error message if the request was not successful
+        return response.text
+# Create the Gradio Blocks UI
+with gr.Blocks(theme="Hev832/Applio") as demo:
+    gr.Markdown("# <center> ElevenLabs Speech-to-Speech Conversion </center>")
+    gr.Markdown("Upload an audio file and enter the Voice ID to convert it using the Eleven Labs Speech-to-Speech API.")
+    with gr.Row():
+        voice_id = gr.Textbox(label="Voice ID")
+        audio_file = gr.Audio(source="upload", type="file", label="Input Audio")
+    with gr.Row():
+        output_audio = gr.File(label="Output Audio")
+        error_message = gr.Textbox(label="Error Message", visible=False)
+    def process_audio(voice_id, audio_file):
+        result = sts_conversion(voice_id, audio_file)
+        if result.endswith('.mp3'):
+            return result, ""
+        else:
+            return None, result
+    submit_btn = gr.Button("Convert")
+    submit_btn.click(
+        process_audio,
+        inputs=[voice_id, audio_file],
+        outputs=[output_audio, error_message]
+    )
+# Launch the Blocks interface
+demo.launch()