Spaces:

Rogerjs
/

Voicecloner

Sleeping

App Files Files Community

Roger commited on Dec 5, 2024

Commit

a73a846

2 Parent(s): 3a938ac 4f9d782

Merge branch 'main' of https://huggingface.co/spaces/Rogerjs/Voicecloner

Browse files

Files changed (2) hide show

app.py +0 -134
requirements.txt +0 -7

app.py CHANGED Viewed

@@ -1,134 +0,0 @@
-import gradio as gr
-import numpy as np
-import torch
-import os
-import time
-from scipy.io import wavfile
-# Explicitly import Bark components
-from bark import generate_audio, SAMPLE_RATE
-from bark.generation import preload_models
-class VoiceCloningApp:
-    def __init__(self):
-        # Create working directory
-        self.base_dir = os.path.dirname(os.path.abspath(__file__))
-        self.working_dir = os.path.join(self.base_dir, "working_files")
-        os.makedirs(self.working_dir, exist_ok=True)
-        # Explicit model loading with error handling
-        try:
-            print("Attempting to load Bark models...")
-            # Remove device argument
-            preload_models()
-            print("Bark models loaded successfully.")
-        except Exception as e:
-            print(f"Error loading Bark models: {e}")
-            # Log the full error for debugging
-            import traceback
-            traceback.print_exc()
-            # Provide a more informative error message
-            raise RuntimeError(f"Could not load Bark models. Error: {e}")
-    def process_reference_audio(self, audio_data):
-        """Simple audio processing"""
-        if audio_data is None:
-            return "Please provide an audio input"
-        try:
-            # Unpack audio data
-            sample_rate, audio_array = audio_data
-            # Normalize audio
-            audio_array = audio_array / np.max(np.abs(audio_array))
-            # Save reference audio
-            filename = f"reference_{int(time.time())}.wav"
-            filepath = os.path.join(self.working_dir, filename)
-            wavfile.write(filepath, sample_rate, audio_array)
-            return "✅ Audio captured successfully!"
-        except Exception as e:
-            return f"Error processing audio: {str(e)}"
-    def generate_speech(self, text):
-        """Generate speech using Bark"""
-        if not text or not text.strip():
-            return None, "Please enter some text to speak"
-        try:
-            # Generate audio with explicit error handling
-            print(f"Generating speech for text: {text}")
-            # Simplified audio generation
-            audio_array = generate_audio(
-                text,
-                history_prompt=None,
-                temp=0.7
-            )
-            # Save generated audio
-            filename = f"generated_speech_{int(time.time())}.wav"
-            filepath = os.path.join(self.working_dir, filename)
-            wavfile.write(filepath, SAMPLE_RATE, audio_array)
-            return filepath, None
-        except Exception as e:
-            print(f"Speech generation error: {e}")
-            # Log the full error for debugging
-            import traceback
-            traceback.print_exc()
-            return None, f"Error generating speech: {str(e)}"
-def create_interface():
-    # Create working directory if it doesn't exist
-    working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "working_files")
-    os.makedirs(working_dir, exist_ok=True)
-    app = VoiceCloningApp()
-    # Use the most basic Gradio theme to avoid font issues
-    with gr.Blocks() as interface:
-        gr.Markdown("# 🎙️ Voice Cloning App")
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("## 1. Capture Reference Voice")
-                reference_audio = gr.Audio(sources=["microphone", "upload"], type="numpy")
-                process_btn = gr.Button("Process Reference Voice")
-                process_output = gr.Textbox(label="Processing Result")
-            with gr.Column():
-                gr.Markdown("## 2. Generate Speech")
-                text_input = gr.Textbox(label="Enter Text to Speak")
-                generate_btn = gr.Button("Generate Speech")
-                audio_output = gr.Audio(label="Generated Speech")
-                error_output = gr.Textbox(label="Errors", visible=True)
-        # Bind functions
-        process_btn.click(
-            fn=app.process_reference_audio,
-            inputs=reference_audio,
-            outputs=process_output
-        )
-        generate_btn.click(
-            fn=app.generate_speech,
-            inputs=text_input,
-            outputs=[audio_output, error_output]
-        )
-    return interface
-if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch(
-        share=False,
-        debug=True,
-        show_error=True,
-        server_name='0.0.0.0',
-        server_port=7860
-    )

requirements.txt CHANGED Viewed

@@ -1,7 +0,0 @@
-gradio>=3.50.2
-numpy
-scipy
-soundfile
-torch
-transformers
-git+https://github.com/suno-ai/bark.git