Spaces:

Artificial-superintelligence
/

Testvoice

Paused

App Files Files Community

Artificial-superintelligence commited on Oct 22, 2024

Commit

e86aa69

verified ·

1 Parent(s): fbf5ae6

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -76

app.py CHANGED Viewed

@@ -1,76 +1,168 @@
-runtime error
-Exit code: 1. Reason: esample:False
- | > num_mels:64
- | > log_func:np.log10
- | > min_level_db:-100
- | > frame_shift_ms:None
- | > frame_length_ms:None
- | > ref_level_db:20
- | > fft_size:512
- | > power:1.5
- | > preemphasis:0.97
- | > griffin_lim_iters:60
- | > signal_norm:False
- | > symmetric_norm:False
- | > mel_fmin:0
- | > mel_fmax:8000.0
- | > pitch_fmin:1.0
- | > pitch_fmax:640.0
- | > spec_gain:20.0
- | > stft_pad_mode:reflect
- | > max_norm:4.0
- | > clip_norm:False
- | > do_trim_silence:False
- | > trim_db:60
- | > do_sound_norm:False
- | > do_amp_to_db_linear:True
- | > do_amp_to_db_mel:True
- | > do_rms_norm:True
- | > db_level:-27.0
- | > stats_path:None
- | > base:10
- | > hop_length:160
- | > win_length:400
- > External Speaker Encoder Loaded !!
- > initialization of language-embedding layers.
-/usr/local/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:143: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.
-  WeightNorm.apply(module, name, dim)
- > Model fully restored.
- > Setting up Audio Processor...
- | > sample_rate:16000
- | > resample:False
- | > num_mels:64
- | > log_func:np.log10
- | > min_level_db:-100
- | > frame_shift_ms:None
- | > frame_length_ms:None
- | > ref_level_db:20
- | > fft_size:512
- | > power:1.5
- | > preemphasis:0.97
- | > griffin_lim_iters:60
- | > signal_norm:False
- | > symmetric_norm:False
- | > mel_fmin:0
- | > mel_fmax:8000.0
- | > pitch_fmin:1.0
- | > pitch_fmax:640.0
- | > spec_gain:20.0
- | > stft_pad_mode:reflect
- | > max_norm:4.0
- | > clip_norm:False
- | > do_trim_silence:False
- | > trim_db:60
- | > do_sound_norm:False
- | > do_amp_to_db_linear:True
- | > do_amp_to_db_mel:True
- | > do_rms_norm:True
- | > db_level:-27.0
- | > stats_path:None
- | > base:10
- | > hop_length:160
- | > win_length:400
-Traceback (most recent call last):
-  File "/home/user/app/app.py", line 11, in <module>
-    tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu")
-AttributeError: 'TTS' object has no attribute 'to'

+import gradio as gr
+import numpy as np
+import librosa
+import soundfile as sf
+from TTS.api import TTS
+import torch
+import os
+import tempfile
+# Initialize TTS model
+try:
+    tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
+except Exception as e:
+    print(f"Error initializing TTS model: {e}")
+    tts = None
+def load_audio(audio_path):
+    try:
+        audio, sr = librosa.load(audio_path, sr=None)
+        return audio, sr
+    except Exception as e:
+        print(f"Error loading audio: {e}")
+        return None, None
+def save_audio(audio, sr, path):
+    try:
+        sf.write(path, audio, sr)
+    except Exception as e:
+        print(f"Error saving audio: {e}")
+def pitch_shift(audio, sr, n_steps):
+    try:
+        return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
+    except Exception as e:
+        print(f"Error in pitch shifting: {e}")
+        return audio
+def change_voice(audio_path, pitch_shift_amount, formant_shift_amount):
+    if tts is None:
+        return None, None
+    audio, sr = load_audio(audio_path)
+    if audio is None or sr is None:
+        return None, None
+    pitched_audio = pitch_shift(audio, sr, pitch_shift_amount)
+    try:
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            save_audio(pitched_audio, sr, temp_file.name)
+            converted_audio_path = tts.voice_conversion(
+                source_wav=temp_file.name,
+                target_wav="path/to/female_target_voice.wav",  # You need to provide a female target voice file
+                output_wav=None
+            )
+        converted_audio, _ = load_audio(converted_audio_path)
+        formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount)
+        os.unlink(temp_file.name)
+        os.unlink(converted_audio_path)
+        return sr, formant_shifted_audio
+    except Exception as e:
+        print(f"Error in voice conversion: {e}")
+        return None, None
+def process_audio(audio_file, pitch_shift_amount, formant_shift_amount):
+    if audio_file is None:
+        return None
+    sr, audio = change_voice(audio_file.name, pitch_shift_amount, formant_shift_amount)
+    if sr is None or audio is None:
+        return None
+    output_path = "output_voice.wav"
+    save_audio(audio, sr, output_path)
+    return output_path
+# Custom CSS for improved design
+custom_css = """
+.gradio-container {
+    background-color: #f0f4f8;
+}
+.container {
+    max-width: 900px;
+    margin: auto;
+    padding: 20px;
+    border-radius: 10px;
+    background-color: white;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+h1 {
+    color: #2c3e50;
+    text-align: center;
+    font-size: 2.5em;
+    margin-bottom: 20px;
+}
+.description {
+    text-align: center;
+    color: #34495e;
+    margin-bottom: 30px;
+}
+.input-section, .output-section {
+    background-color: #ecf0f1;
+    padding: 20px;
+    border-radius: 8px;
+    margin-bottom: 20px;
+}
+.input-section h3, .output-section h3 {
+    color: #2980b9;
+    margin-bottom: 15px;
+}
+"""
+# Gradio Interface with improved design
+with gr.Blocks(css=custom_css) as demo:
+    gr.HTML(
+        """
+        <div style="text-align: center; max-width: 800px; margin: 0 auto;">
+            <div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
+                <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;">
+                    <path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/>
+                    <path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/>
+                </svg>
+                <h1 style="font-weight: 900; margin-bottom: 7px;">
+                    AI Voice Changer
+                </h1>
+            </div>
+            <p class="description">Transform any voice into a realistic female voice using advanced AI technology</p>
+        </div>
+        """
+    )
+    with gr.Row():
+        with gr.Column(elem_classes="input-section"):
+            gr.Markdown("### Input")
+            audio_input = gr.Audio(type="filepath", label="Upload Voice")
+            pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0)
+            formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0)
+            submit_btn = gr.Button("Transform Voice", variant="primary")
+        with gr.Column(elem_classes="output-section"):
+            gr.Markdown("### Output")
+            audio_output = gr.Audio(label="Transformed Voice")
+    submit_btn.click(
+        fn=process_audio,
+        inputs=[audio_input, pitch_shift, formant_shift],
+        outputs=audio_output,
+    )
+    gr.Markdown(
+        """
+        ### How to use:
+        1. Upload an audio file containing the voice you want to transform.
+        2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional).
+        3. Click the "Transform Voice" button to process the audio.
+        4. Listen to the transformed voice in the output section.
+        5. Download the transformed audio file if desired.
+        Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings.
+        """
+    )
+if __name__ == "__main__":
+    demo.launch()