Spaces:

gskdsrikrishna
/

AI_Voice_Generator

Paused

App Files Files Community

gskdsrikrishna commited on Feb 6

Commit

b988aff

verified ·

1 Parent(s): cb3f6f4

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -2,22 +2,26 @@ import streamlit as st
 import torch
 import torchaudio
 from scipy.io.wavfile import write
-from IPython.display import Audio
-from tacotron2_model import Tacotron2  # Assuming Tacotron2 is available
-from waveglow_model import WaveGlow  # Assuming WaveGlow is available
-# Load pre-trained Tacotron2 and WaveGlow models
-tacotron2 = Tacotron2()
-waveglow = WaveGlow()
-def synthesize_voice(text, voice_sample):
-    # Perform voice cloning synthesis based on input text and voice sample
-    mel_spec, alignment = tacotron2.encode(text)  # Generate mel-spectrogram from text
-    audio = waveglow.decode(mel_spec)  # Decode the mel-spectrogram to audio
     # Save the synthesized audio to a file
     output_path = "synthesized_voice.wav"
-    write(output_path, 22050, audio)  # Write audio to a .wav file (or use another format)
     return output_path
 def main():
@@ -29,17 +33,16 @@ def main():
     if uploaded_audio is not None:
         st.audio(uploaded_audio, format="audio/wav")
-        voice_sample = torchaudio.load(uploaded_audio)
     # Textbox to input the text to be cloned
     text_input = st.text_area("Enter text for voice cloning")
     if st.button("Generate Cloned Voice"):
-        if uploaded_audio and text_input:
-            # Use the uploaded voice sample and input text for synthesis
-            output_path = synthesize_voice(text_input, voice_sample)
-            # Play the generated audio
             st.audio(output_path, format="audio/wav")
             st.success("Voice cloning successful!")

 import torch
 import torchaudio
 from scipy.io.wavfile import write
+# Load pre-trained Tacotron2 and WaveGlow models (ensure these are properly installed and loaded)
+# The following lines are placeholders for model loading. You need to load the actual pre-trained models.
+# tacotron2 = Tacotron2()
+# waveglow = WaveGlow()
+def synthesize_voice(text):
+    """
+    Synthesize voice from the given text using a pre-trained Tacotron2 and WaveGlow model.
+    This function is a simplified placeholder; you need to load actual models and synthesize properly.
+    """
+    # For the sake of this example, generate a dummy wave
+    # In reality, you would generate this using the Tacotron2 and WaveGlow models
+    sample_rate = 22050
+    duration = 2  # 2 seconds
+    audio = torch.sin(torch.linspace(0, duration * 2 * torch.pi, sample_rate * duration))
     # Save the synthesized audio to a file
     output_path = "synthesized_voice.wav"
+    write(output_path, sample_rate, audio.numpy())
     return output_path
 def main():
     if uploaded_audio is not None:
         st.audio(uploaded_audio, format="audio/wav")
     # Textbox to input the text to be cloned
     text_input = st.text_area("Enter text for voice cloning")
     if st.button("Generate Cloned Voice"):
+        if text_input:
+            # Use the text input for voice cloning
+            output_path = synthesize_voice(text_input)
+            # Play the generated audio using Streamlit's audio component
             st.audio(output_path, format="audio/wav")
             st.success("Voice cloning successful!")