gskdsrikrishna commited on
Commit
cb3f6f4
·
verified ·
1 Parent(s): 686808c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -21
app.py CHANGED
@@ -2,26 +2,22 @@ import streamlit as st
2
  import torch
3
  import torchaudio
4
  from scipy.io.wavfile import write
5
-
6
- # Load pre-trained Tacotron2 and WaveGlow models (ensure these are properly installed and loaded)
7
- # The following lines are placeholders for model loading. You need to load the actual pre-trained models.
8
- # tacotron2 = Tacotron2()
9
- # waveglow = WaveGlow()
10
-
11
- def synthesize_voice(text):
12
- """
13
- Synthesize voice from the given text using a pre-trained Tacotron2 and WaveGlow model.
14
- This function is a simplified placeholder; you need to load actual models and synthesize properly.
15
- """
16
- # For the sake of this example, generate a dummy wave
17
- # In reality, you would generate this using the Tacotron2 and WaveGlow models
18
- sample_rate = 22050
19
- duration = 2 # 2 seconds
20
- audio = torch.sin(torch.linspace(0, duration * 2 * torch.pi, sample_rate * duration))
21
 
22
  # Save the synthesized audio to a file
23
  output_path = "synthesized_voice.wav"
24
- write(output_path, sample_rate, audio.numpy())
25
  return output_path
26
 
27
  def main():
@@ -33,16 +29,17 @@ def main():
33
 
34
  if uploaded_audio is not None:
35
  st.audio(uploaded_audio, format="audio/wav")
 
36
 
37
  # Textbox to input the text to be cloned
38
  text_input = st.text_area("Enter text for voice cloning")
39
 
40
  if st.button("Generate Cloned Voice"):
41
- if text_input:
42
- # Use the text input for voice cloning
43
- output_path = synthesize_voice(text_input)
44
 
45
- # Play the generated audio using Streamlit's audio component
46
  st.audio(output_path, format="audio/wav")
47
  st.success("Voice cloning successful!")
48
 
 
2
  import torch
3
  import torchaudio
4
  from scipy.io.wavfile import write
5
+ from IPython.display import Audio
6
+ from tacotron2_model import Tacotron2 # Assuming Tacotron2 is available
7
+ from waveglow_model import WaveGlow # Assuming WaveGlow is available
8
+
9
+ # Load pre-trained Tacotron2 and WaveGlow models
10
+ tacotron2 = Tacotron2()
11
+ waveglow = WaveGlow()
12
+
13
+ def synthesize_voice(text, voice_sample):
14
+ # Perform voice cloning synthesis based on input text and voice sample
15
+ mel_spec, alignment = tacotron2.encode(text) # Generate mel-spectrogram from text
16
+ audio = waveglow.decode(mel_spec) # Decode the mel-spectrogram to audio
 
 
 
 
17
 
18
  # Save the synthesized audio to a file
19
  output_path = "synthesized_voice.wav"
20
+ write(output_path, 22050, audio) # Write audio to a .wav file (or use another format)
21
  return output_path
22
 
23
  def main():
 
29
 
30
  if uploaded_audio is not None:
31
  st.audio(uploaded_audio, format="audio/wav")
32
+ voice_sample = torchaudio.load(uploaded_audio)
33
 
34
  # Textbox to input the text to be cloned
35
  text_input = st.text_area("Enter text for voice cloning")
36
 
37
  if st.button("Generate Cloned Voice"):
38
+ if uploaded_audio and text_input:
39
+ # Use the uploaded voice sample and input text for synthesis
40
+ output_path = synthesize_voice(text_input, voice_sample)
41
 
42
+ # Play the generated audio
43
  st.audio(output_path, format="audio/wav")
44
  st.success("Voice cloning successful!")
45