Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import os | |
import time | |
from scipy.io import wavfile | |
# Explicitly import Bark components | |
from bark import generate_audio, SAMPLE_RATE | |
from bark.generation import preload_models | |
class VoiceCloningApp: | |
def __init__(self): | |
# Create working directory | |
self.base_dir = os.path.dirname(os.path.abspath(__file__)) | |
self.working_dir = os.path.join(self.base_dir, "working_files") | |
os.makedirs(self.working_dir, exist_ok=True) | |
# Explicit model loading with error handling | |
try: | |
print("Attempting to load Bark models...") | |
preload_models() | |
print("Bark models loaded successfully.") | |
except Exception as e: | |
print(f"Error loading Bark models: {e}") | |
import traceback | |
traceback.print_exc() | |
raise RuntimeError(f"Could not load Bark models. Error: {e}") | |
def process_reference_audio(self, audio_data): | |
"""Simple audio processing""" | |
if audio_data is None: | |
return "Please provide an audio input" | |
try: | |
# Unpack audio data | |
sample_rate, audio_array = audio_data | |
# Normalize audio | |
audio_array = audio_array / np.max(np.abs(audio_array)) | |
# Save reference audio | |
filename = f"reference_{int(time.time())}.wav" | |
filepath = os.path.join(self.working_dir, filename) | |
wavfile.write(filepath, sample_rate, audio_array) | |
return "β Audio captured successfully!" | |
except Exception as e: | |
return f"Error processing audio: {str(e)}" | |
def generate_speech(self, text): | |
"""Generate speech using Bark""" | |
if not text or not text.strip(): | |
return None, "Please enter some text to speak" | |
try: | |
# Generate audio with explicit error handling | |
print(f"Generating speech for text: {text}") | |
# Simplified audio generation | |
audio_array = generate_audio( | |
text, | |
history_prompt=None, | |
) | |
# Save generated audio | |
filename = f"generated_speech_{int(time.time())}.wav" | |
filepath = os.path.join(self.working_dir, filename) | |
wavfile.write(filepath, SAMPLE_RATE, audio_array) | |
return filepath, None | |
except Exception as e: | |
print(f"Speech generation error: {e}") | |
import traceback | |
traceback.print_exc() | |
return None, f"Error generating speech: {str(e)}" | |
def create_interface(): | |
# Ensure working directory exists | |
working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "working_files") | |
os.makedirs(working_dir, exist_ok=True) | |
app = VoiceCloningApp() | |
with gr.Blocks() as interface: | |
gr.Markdown("# ποΈ Voice Cloning App") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## 1. Capture Reference Voice") | |
reference_audio = gr.Audio(sources=["microphone", "upload"], type="numpy") | |
process_btn = gr.Button("Process Reference Voice") | |
process_output = gr.Textbox(label="Processing Result") | |
with gr.Column(): | |
gr.Markdown("## 2. Generate Speech") | |
text_input = gr.Textbox(label="Enter Text to Speak") | |
generate_btn = gr.Button("Generate Speech") | |
audio_output = gr.Audio(label="Generated Speech") | |
error_output = gr.Textbox(label="Errors", visible=True) | |
# Bind functions | |
process_btn.click( | |
fn=app.process_reference_audio, | |
inputs=reference_audio, | |
outputs=process_output | |
) | |
generate_btn.click( | |
fn=app.generate_speech, | |
inputs=text_input, | |
outputs=[audio_output, error_output] | |
) | |
return interface | |
if __name__ == "__main__": | |
interface = create_interface() | |
interface.launch( | |
share=False, | |
debug=True, | |
show_error=True, | |
server_name='0.0.0.0', | |
server_port=7860 | |
) |