Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,87 +1,88 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
import
|
4 |
-
import
|
5 |
-
|
6 |
import os
|
7 |
|
|
|
|
|
8 |
|
|
|
|
|
|
|
|
|
9 |
|
|
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
audio = audio + b'\0' * (element_size - (buffer_size % element_size))
|
17 |
-
return audio
|
18 |
-
|
19 |
-
def generate_voice(text, voice_name):
|
20 |
-
api_key = os.environ.get(eleven_api_key)
|
21 |
-
set_api_key(api_key) #set API key
|
22 |
-
try:
|
23 |
-
audio = generate(
|
24 |
-
text[:4000], # Limit to 4000 characters
|
25 |
-
voice=voice_name,
|
26 |
-
model="eleven_multilingual_v2"
|
27 |
-
)
|
28 |
-
return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
|
29 |
-
except UnauthenticatedRateLimitError as e:
|
30 |
-
raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
|
31 |
-
except Exception as e:
|
32 |
-
raise gr.Error(e)
|
33 |
-
|
34 |
-
# description = """
|
35 |
-
# Eleven Multilingual V2 is the world's best Text-to-Speech model. Features 38 voices and supports 28 languages. Sign up on [ElevenLabs](https://elevenlabs.io/?from=partnerpierce7156) to get an API Key.
|
36 |
-
# """
|
37 |
-
|
38 |
-
with gr.Blocks(theme='Hev832/Applio') as block:
|
39 |
-
gr.Markdown('[  ](https://elevenlabs.io)')
|
40 |
-
gr.Markdown("# <center> ElevenLabs </center>")
|
41 |
-
#gr.Markdown(description)
|
42 |
-
|
43 |
-
with gr.Row(variant='panel'):
|
44 |
-
all_voices = voices()
|
45 |
-
input_voice = gr.Dropdown(
|
46 |
-
[ voice.name for voice in all_voices ],
|
47 |
-
value="Rachel",
|
48 |
-
label="Voice",
|
49 |
-
elem_id="input_voice"
|
50 |
-
)
|
51 |
-
|
52 |
-
input_text = gr.Textbox(
|
53 |
-
label="Input Text (4000 characters max)",
|
54 |
-
lines=1,
|
55 |
-
value="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! Γειά σας! Здравей! வணக்கம்!",
|
56 |
-
elem_id="input_text"
|
57 |
-
)
|
58 |
-
|
59 |
-
run_button = gr.Button(
|
60 |
-
text="Generate Voice",
|
61 |
-
type="button",
|
62 |
-
variant="primary"
|
63 |
-
)
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
)
|
81 |
|
|
|
|
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
|
|
|
|
|
|
84 |
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import requests
|
5 |
+
import json
|
6 |
import os
|
7 |
|
8 |
+
# Load the API key from environment variables
|
9 |
+
XI_API_KEY = os.getenv("XI_API_KEY")
|
10 |
|
11 |
+
# Define the function to perform the Speech-to-Speech transformation
|
12 |
+
def sts_conversion(voice_id, audio_file):
|
13 |
+
CHUNK_SIZE = 1024
|
14 |
+
OUTPUT_PATH = "output.mp3"
|
15 |
|
16 |
+
# Construct the URL for the Speech-to-Speech API request
|
17 |
+
sts_url = f"https://api.elevenlabs.io/v1/speech-to-speech/{voice_id}/stream"
|
18 |
|
19 |
+
# Set up headers for the API request, including the API key for authentication
|
20 |
+
headers = {
|
21 |
+
"Accept": "application/json",
|
22 |
+
"xi-api-key": XI_API_KEY
|
23 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
# Set up the data payload for the API request, including model ID and voice settings
|
26 |
+
data = {
|
27 |
+
"model_id": "eleven_english_sts_v2",
|
28 |
+
"voice_settings": json.dumps({
|
29 |
+
"stability": 0.5,
|
30 |
+
"similarity_boost": 0.8,
|
31 |
+
"style": 0.0,
|
32 |
+
"use_speaker_boost": True
|
33 |
+
})
|
34 |
+
}
|
35 |
+
|
36 |
+
# Set up the files to send with the request, including the input audio file
|
37 |
+
files = {
|
38 |
+
"audio": audio_file
|
39 |
+
}
|
|
|
40 |
|
41 |
+
# Make the POST request to the STS API with headers, data, and files, enabling streaming response
|
42 |
+
response = requests.post(sts_url, headers=headers, data=data, files=files, stream=True)
|
43 |
|
44 |
+
# Check if the request was successful
|
45 |
+
if response.ok:
|
46 |
+
# Open the output file in write-binary mode
|
47 |
+
with open(OUTPUT_PATH, "wb") as f:
|
48 |
+
# Read the response in chunks and write to the file
|
49 |
+
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
|
50 |
+
f.write(chunk)
|
51 |
+
# Return the output audio file for download
|
52 |
+
return OUTPUT_PATH
|
53 |
+
else:
|
54 |
+
# Return the error message if the request was not successful
|
55 |
+
return response.text
|
56 |
|
57 |
+
# Create the Gradio Blocks UI
|
58 |
+
with gr.Blocks(theme="Hev832/Applio") as demo:
|
59 |
+
gr.Markdown("# <center> ElevenLabs Speech-to-Speech Conversion </center>")
|
60 |
|
61 |
|
62 |
+
gr.Markdown("Upload an audio file and enter the Voice ID to convert it using the Eleven Labs Speech-to-Speech API.")
|
63 |
+
|
64 |
+
with gr.Row():
|
65 |
+
voice_id = gr.Textbox(label="Voice ID")
|
66 |
+
audio_file = gr.Audio(source="upload", type="file", label="Input Audio")
|
67 |
+
|
68 |
+
with gr.Row():
|
69 |
+
output_audio = gr.File(label="Output Audio")
|
70 |
+
error_message = gr.Textbox(label="Error Message", visible=False)
|
71 |
+
|
72 |
+
def process_audio(voice_id, audio_file):
|
73 |
+
result = sts_conversion(voice_id, audio_file)
|
74 |
+
if result.endswith('.mp3'):
|
75 |
+
return result, ""
|
76 |
+
else:
|
77 |
+
return None, result
|
78 |
+
|
79 |
+
submit_btn = gr.Button("Convert")
|
80 |
+
|
81 |
+
submit_btn.click(
|
82 |
+
process_audio,
|
83 |
+
inputs=[voice_id, audio_file],
|
84 |
+
outputs=[output_audio, error_message]
|
85 |
+
)
|
86 |
|
87 |
+
# Launch the Blocks interface
|
88 |
+
demo.launch()
|