Hev832 commited on
Commit
fd7c55d
·
verified ·
1 Parent(s): d7a8fbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -75
app.py CHANGED
@@ -1,87 +1,88 @@
1
- import random
2
- import gradio as gr
3
- import numpy as np
4
- import time
5
- from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
6
  import os
7
 
 
 
8
 
 
 
 
 
9
 
 
 
10
 
11
- def pad_buffer(audio):
12
- # Pad buffer to multiple of 2 bytes
13
- buffer_size = len(audio)
14
- element_size = np.dtype(np.int16).itemsize
15
- if buffer_size % element_size != 0:
16
- audio = audio + b'\0' * (element_size - (buffer_size % element_size))
17
- return audio
18
-
19
- def generate_voice(text, voice_name):
20
- api_key = os.environ.get(eleven_api_key)
21
- set_api_key(api_key) #set API key
22
- try:
23
- audio = generate(
24
- text[:4000], # Limit to 4000 characters
25
- voice=voice_name,
26
- model="eleven_multilingual_v2"
27
- )
28
- return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
29
- except UnauthenticatedRateLimitError as e:
30
- raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
31
- except Exception as e:
32
- raise gr.Error(e)
33
-
34
- # description = """
35
- # Eleven Multilingual V2 is the world's best Text-to-Speech model. Features 38 voices and supports 28 languages. Sign up on [ElevenLabs](https://elevenlabs.io/?from=partnerpierce7156) to get an API Key.
36
- # """
37
-
38
- with gr.Blocks(theme='Hev832/Applio') as block:
39
- gr.Markdown('[ ![ElevenLabs](https://user-images.githubusercontent.com/12028621/262629275-4f85c9cf-85b6-435e-ab50-5b8c7c4e9dd2.png) ](https://elevenlabs.io)')
40
- gr.Markdown("# <center> ElevenLabs </center>")
41
- #gr.Markdown(description)
42
-
43
- with gr.Row(variant='panel'):
44
- all_voices = voices()
45
- input_voice = gr.Dropdown(
46
- [ voice.name for voice in all_voices ],
47
- value="Rachel",
48
- label="Voice",
49
- elem_id="input_voice"
50
- )
51
-
52
- input_text = gr.Textbox(
53
- label="Input Text (4000 characters max)",
54
- lines=1,
55
- value="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! Γειά σας! Здравей! வணக்கம்!",
56
- elem_id="input_text"
57
- )
58
-
59
- run_button = gr.Button(
60
- text="Generate Voice",
61
- type="button",
62
- variant="primary"
63
- )
64
 
65
- out_audio = gr.Audio(
66
- label="Speech Output",
67
- type="numpy",
68
- elem_id="out_audio",
69
- format="mp3"
70
- )
71
-
72
- inputs = [input_text, input_voice]
73
- outputs = [out_audio]
74
-
75
- run_button.click(
76
- fn=generate_voice,
77
- inputs=inputs,
78
- outputs=outputs,
79
- queue=True
80
- )
81
 
 
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
 
 
 
84
 
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- block.queue(concurrency_count=5).launch(debug=True)
 
 
1
+
2
+
3
+ import gradio as gr
4
+ import requests
5
+ import json
6
  import os
7
 
8
+ # Load the API key from environment variables
9
+ XI_API_KEY = os.getenv("XI_API_KEY")
10
 
11
+ # Define the function to perform the Speech-to-Speech transformation
12
+ def sts_conversion(voice_id, audio_file):
13
+ CHUNK_SIZE = 1024
14
+ OUTPUT_PATH = "output.mp3"
15
 
16
+ # Construct the URL for the Speech-to-Speech API request
17
+ sts_url = f"https://api.elevenlabs.io/v1/speech-to-speech/{voice_id}/stream"
18
 
19
+ # Set up headers for the API request, including the API key for authentication
20
+ headers = {
21
+ "Accept": "application/json",
22
+ "xi-api-key": XI_API_KEY
23
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Set up the data payload for the API request, including model ID and voice settings
26
+ data = {
27
+ "model_id": "eleven_english_sts_v2",
28
+ "voice_settings": json.dumps({
29
+ "stability": 0.5,
30
+ "similarity_boost": 0.8,
31
+ "style": 0.0,
32
+ "use_speaker_boost": True
33
+ })
34
+ }
35
+
36
+ # Set up the files to send with the request, including the input audio file
37
+ files = {
38
+ "audio": audio_file
39
+ }
 
40
 
41
+ # Make the POST request to the STS API with headers, data, and files, enabling streaming response
42
+ response = requests.post(sts_url, headers=headers, data=data, files=files, stream=True)
43
 
44
+ # Check if the request was successful
45
+ if response.ok:
46
+ # Open the output file in write-binary mode
47
+ with open(OUTPUT_PATH, "wb") as f:
48
+ # Read the response in chunks and write to the file
49
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
50
+ f.write(chunk)
51
+ # Return the output audio file for download
52
+ return OUTPUT_PATH
53
+ else:
54
+ # Return the error message if the request was not successful
55
+ return response.text
56
 
57
+ # Create the Gradio Blocks UI
58
+ with gr.Blocks(theme="Hev832/Applio") as demo:
59
+ gr.Markdown("# <center> ElevenLabs Speech-to-Speech Conversion </center>")
60
 
61
 
62
+ gr.Markdown("Upload an audio file and enter the Voice ID to convert it using the Eleven Labs Speech-to-Speech API.")
63
+
64
+ with gr.Row():
65
+ voice_id = gr.Textbox(label="Voice ID")
66
+ audio_file = gr.Audio(source="upload", type="file", label="Input Audio")
67
+
68
+ with gr.Row():
69
+ output_audio = gr.File(label="Output Audio")
70
+ error_message = gr.Textbox(label="Error Message", visible=False)
71
+
72
+ def process_audio(voice_id, audio_file):
73
+ result = sts_conversion(voice_id, audio_file)
74
+ if result.endswith('.mp3'):
75
+ return result, ""
76
+ else:
77
+ return None, result
78
+
79
+ submit_btn = gr.Button("Convert")
80
+
81
+ submit_btn.click(
82
+ process_audio,
83
+ inputs=[voice_id, audio_file],
84
+ outputs=[output_audio, error_message]
85
+ )
86
 
87
+ # Launch the Blocks interface
88
+ demo.launch()