Spaces:

gskdsrikrishna
/

AI_Voice_Generator

Paused

App Files Files Community

gskdsrikrishna commited on Feb 6

Commit

cb3f6f4

verified ·

1 Parent(s): 686808c

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -21

app.py CHANGED Viewed

@@ -2,26 +2,22 @@ import streamlit as st
 import torch
 import torchaudio
 from scipy.io.wavfile import write
-# Load pre-trained Tacotron2 and WaveGlow models (ensure these are properly installed and loaded)
-# The following lines are placeholders for model loading. You need to load the actual pre-trained models.
-# tacotron2 = Tacotron2()
-# waveglow = WaveGlow()
-def synthesize_voice(text):
-    """
-    Synthesize voice from the given text using a pre-trained Tacotron2 and WaveGlow model.
-    This function is a simplified placeholder; you need to load actual models and synthesize properly.
-    """
-    # For the sake of this example, generate a dummy wave
-    # In reality, you would generate this using the Tacotron2 and WaveGlow models
-    sample_rate = 22050
-    duration = 2  # 2 seconds
-    audio = torch.sin(torch.linspace(0, duration * 2 * torch.pi, sample_rate * duration))
     # Save the synthesized audio to a file
     output_path = "synthesized_voice.wav"
-    write(output_path, sample_rate, audio.numpy())
     return output_path
 def main():
@@ -33,16 +29,17 @@ def main():
     if uploaded_audio is not None:
         st.audio(uploaded_audio, format="audio/wav")
     # Textbox to input the text to be cloned
     text_input = st.text_area("Enter text for voice cloning")
     if st.button("Generate Cloned Voice"):
-        if text_input:
-            # Use the text input for voice cloning
-            output_path = synthesize_voice(text_input)
-            # Play the generated audio using Streamlit's audio component
             st.audio(output_path, format="audio/wav")
             st.success("Voice cloning successful!")

 import torch
 import torchaudio
 from scipy.io.wavfile import write
+from IPython.display import Audio
+from tacotron2_model import Tacotron2  # Assuming Tacotron2 is available
+from waveglow_model import WaveGlow  # Assuming WaveGlow is available
+# Load pre-trained Tacotron2 and WaveGlow models
+tacotron2 = Tacotron2()
+waveglow = WaveGlow()
+def synthesize_voice(text, voice_sample):
+    # Perform voice cloning synthesis based on input text and voice sample
+    mel_spec, alignment = tacotron2.encode(text)  # Generate mel-spectrogram from text
+    audio = waveglow.decode(mel_spec)  # Decode the mel-spectrogram to audio
     # Save the synthesized audio to a file
     output_path = "synthesized_voice.wav"
+    write(output_path, 22050, audio)  # Write audio to a .wav file (or use another format)
     return output_path
 def main():
     if uploaded_audio is not None:
         st.audio(uploaded_audio, format="audio/wav")
+        voice_sample = torchaudio.load(uploaded_audio)
     # Textbox to input the text to be cloned
     text_input = st.text_area("Enter text for voice cloning")
     if st.button("Generate Cloned Voice"):
+        if uploaded_audio and text_input:
+            # Use the uploaded voice sample and input text for synthesis
+            output_path = synthesize_voice(text_input, voice_sample)
+            # Play the generated audio
             st.audio(output_path, format="audio/wav")
             st.success("Voice cloning successful!")