Voicecloner / app.py
Rogerjs's picture
Update app.py
3e1f471 verified
raw
history blame
4.33 kB
import gradio as gr
import numpy as np
import os
import time
from scipy.io import wavfile
# Explicitly import Bark components
from bark import generate_audio, SAMPLE_RATE
from bark.generation import preload_models
class VoiceCloningApp:
def __init__(self):
# Create working directory
self.base_dir = os.path.dirname(os.path.abspath(__file__))
self.working_dir = os.path.join(self.base_dir, "working_files")
os.makedirs(self.working_dir, exist_ok=True)
# Explicit model loading with error handling
try:
print("Attempting to load Bark models...")
preload_models()
print("Bark models loaded successfully.")
except Exception as e:
print(f"Error loading Bark models: {e}")
import traceback
traceback.print_exc()
raise RuntimeError(f"Could not load Bark models. Error: {e}")
def process_reference_audio(self, audio_data):
"""Simple audio processing"""
if audio_data is None:
return "Please provide an audio input"
try:
# Unpack audio data
sample_rate, audio_array = audio_data
# Normalize audio
audio_array = audio_array / np.max(np.abs(audio_array))
# Save reference audio
filename = f"reference_{int(time.time())}.wav"
filepath = os.path.join(self.working_dir, filename)
wavfile.write(filepath, sample_rate, audio_array)
return "βœ… Audio captured successfully!"
except Exception as e:
return f"Error processing audio: {str(e)}"
def generate_speech(self, text):
"""Generate speech using Bark"""
if not text or not text.strip():
return None, "Please enter some text to speak"
try:
# Generate audio with explicit error handling
print(f"Generating speech for text: {text}")
# Simplified audio generation
audio_array = generate_audio(
text,
history_prompt=None,
)
# Save generated audio
filename = f"generated_speech_{int(time.time())}.wav"
filepath = os.path.join(self.working_dir, filename)
wavfile.write(filepath, SAMPLE_RATE, audio_array)
return filepath, None
except Exception as e:
print(f"Speech generation error: {e}")
import traceback
traceback.print_exc()
return None, f"Error generating speech: {str(e)}"
def create_interface():
# Ensure working directory exists
working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "working_files")
os.makedirs(working_dir, exist_ok=True)
app = VoiceCloningApp()
with gr.Blocks() as interface:
gr.Markdown("# πŸŽ™οΈ Voice Cloning App")
with gr.Row():
with gr.Column():
gr.Markdown("## 1. Capture Reference Voice")
reference_audio = gr.Audio(sources=["microphone", "upload"], type="numpy")
process_btn = gr.Button("Process Reference Voice")
process_output = gr.Textbox(label="Processing Result")
with gr.Column():
gr.Markdown("## 2. Generate Speech")
text_input = gr.Textbox(label="Enter Text to Speak")
generate_btn = gr.Button("Generate Speech")
audio_output = gr.Audio(label="Generated Speech")
error_output = gr.Textbox(label="Errors", visible=True)
# Bind functions
process_btn.click(
fn=app.process_reference_audio,
inputs=reference_audio,
outputs=process_output
)
generate_btn.click(
fn=app.generate_speech,
inputs=text_input,
outputs=[audio_output, error_output]
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(
share=False,
debug=True,
show_error=True,
server_name='0.0.0.0',
server_port=7860
)