Roger commited on
Commit
a73a846
·
2 Parent(s): 3a938ac 4f9d782

Merge branch 'main' of https://huggingface.co/spaces/Rogerjs/Voicecloner

Browse files
Files changed (2) hide show
  1. app.py +0 -134
  2. requirements.txt +0 -7
app.py CHANGED
@@ -1,134 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- import torch
4
- import os
5
- import time
6
- from scipy.io import wavfile
7
-
8
- # Explicitly import Bark components
9
- from bark import generate_audio, SAMPLE_RATE
10
- from bark.generation import preload_models
11
-
12
- class VoiceCloningApp:
13
- def __init__(self):
14
- # Create working directory
15
- self.base_dir = os.path.dirname(os.path.abspath(__file__))
16
- self.working_dir = os.path.join(self.base_dir, "working_files")
17
- os.makedirs(self.working_dir, exist_ok=True)
18
-
19
- # Explicit model loading with error handling
20
- try:
21
- print("Attempting to load Bark models...")
22
- # Remove device argument
23
- preload_models()
24
- print("Bark models loaded successfully.")
25
- except Exception as e:
26
- print(f"Error loading Bark models: {e}")
27
- # Log the full error for debugging
28
- import traceback
29
- traceback.print_exc()
30
-
31
- # Provide a more informative error message
32
- raise RuntimeError(f"Could not load Bark models. Error: {e}")
33
-
34
- def process_reference_audio(self, audio_data):
35
- """Simple audio processing"""
36
- if audio_data is None:
37
- return "Please provide an audio input"
38
-
39
- try:
40
- # Unpack audio data
41
- sample_rate, audio_array = audio_data
42
-
43
- # Normalize audio
44
- audio_array = audio_array / np.max(np.abs(audio_array))
45
-
46
- # Save reference audio
47
- filename = f"reference_{int(time.time())}.wav"
48
- filepath = os.path.join(self.working_dir, filename)
49
- wavfile.write(filepath, sample_rate, audio_array)
50
-
51
- return "✅ Audio captured successfully!"
52
-
53
- except Exception as e:
54
- return f"Error processing audio: {str(e)}"
55
-
56
- def generate_speech(self, text):
57
- """Generate speech using Bark"""
58
- if not text or not text.strip():
59
- return None, "Please enter some text to speak"
60
-
61
- try:
62
- # Generate audio with explicit error handling
63
- print(f"Generating speech for text: {text}")
64
-
65
- # Simplified audio generation
66
- audio_array = generate_audio(
67
- text,
68
- history_prompt=None,
69
- temp=0.7
70
- )
71
-
72
- # Save generated audio
73
- filename = f"generated_speech_{int(time.time())}.wav"
74
- filepath = os.path.join(self.working_dir, filename)
75
- wavfile.write(filepath, SAMPLE_RATE, audio_array)
76
-
77
- return filepath, None
78
-
79
- except Exception as e:
80
- print(f"Speech generation error: {e}")
81
- # Log the full error for debugging
82
- import traceback
83
- traceback.print_exc()
84
- return None, f"Error generating speech: {str(e)}"
85
-
86
- def create_interface():
87
- # Create working directory if it doesn't exist
88
- working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "working_files")
89
- os.makedirs(working_dir, exist_ok=True)
90
-
91
- app = VoiceCloningApp()
92
-
93
- # Use the most basic Gradio theme to avoid font issues
94
- with gr.Blocks() as interface:
95
- gr.Markdown("# 🎙️ Voice Cloning App")
96
-
97
- with gr.Row():
98
- with gr.Column():
99
- gr.Markdown("## 1. Capture Reference Voice")
100
- reference_audio = gr.Audio(sources=["microphone", "upload"], type="numpy")
101
- process_btn = gr.Button("Process Reference Voice")
102
- process_output = gr.Textbox(label="Processing Result")
103
-
104
- with gr.Column():
105
- gr.Markdown("## 2. Generate Speech")
106
- text_input = gr.Textbox(label="Enter Text to Speak")
107
- generate_btn = gr.Button("Generate Speech")
108
- audio_output = gr.Audio(label="Generated Speech")
109
- error_output = gr.Textbox(label="Errors", visible=True)
110
-
111
- # Bind functions
112
- process_btn.click(
113
- fn=app.process_reference_audio,
114
- inputs=reference_audio,
115
- outputs=process_output
116
- )
117
-
118
- generate_btn.click(
119
- fn=app.generate_speech,
120
- inputs=text_input,
121
- outputs=[audio_output, error_output]
122
- )
123
-
124
- return interface
125
-
126
- if __name__ == "__main__":
127
- interface = create_interface()
128
- interface.launch(
129
- share=False,
130
- debug=True,
131
- show_error=True,
132
- server_name='0.0.0.0',
133
- server_port=7860
134
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,7 +0,0 @@
1
- gradio>=3.50.2
2
- numpy
3
- scipy
4
- soundfile
5
- torch
6
- transformers
7
- git+https://github.com/suno-ai/bark.git