gskdsrikrishna commited on
Commit
b988aff
·
verified ·
1 Parent(s): cb3f6f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -2,22 +2,26 @@ import streamlit as st
2
  import torch
3
  import torchaudio
4
  from scipy.io.wavfile import write
5
- from IPython.display import Audio
6
- from tacotron2_model import Tacotron2 # Assuming Tacotron2 is available
7
- from waveglow_model import WaveGlow # Assuming WaveGlow is available
8
-
9
- # Load pre-trained Tacotron2 and WaveGlow models
10
- tacotron2 = Tacotron2()
11
- waveglow = WaveGlow()
12
-
13
- def synthesize_voice(text, voice_sample):
14
- # Perform voice cloning synthesis based on input text and voice sample
15
- mel_spec, alignment = tacotron2.encode(text) # Generate mel-spectrogram from text
16
- audio = waveglow.decode(mel_spec) # Decode the mel-spectrogram to audio
 
 
 
 
17
 
18
  # Save the synthesized audio to a file
19
  output_path = "synthesized_voice.wav"
20
- write(output_path, 22050, audio) # Write audio to a .wav file (or use another format)
21
  return output_path
22
 
23
  def main():
@@ -29,17 +33,16 @@ def main():
29
 
30
  if uploaded_audio is not None:
31
  st.audio(uploaded_audio, format="audio/wav")
32
- voice_sample = torchaudio.load(uploaded_audio)
33
 
34
  # Textbox to input the text to be cloned
35
  text_input = st.text_area("Enter text for voice cloning")
36
 
37
  if st.button("Generate Cloned Voice"):
38
- if uploaded_audio and text_input:
39
- # Use the uploaded voice sample and input text for synthesis
40
- output_path = synthesize_voice(text_input, voice_sample)
41
 
42
- # Play the generated audio
43
  st.audio(output_path, format="audio/wav")
44
  st.success("Voice cloning successful!")
45
 
 
2
  import torch
3
  import torchaudio
4
  from scipy.io.wavfile import write
5
+
6
+ # Load pre-trained Tacotron2 and WaveGlow models (ensure these are properly installed and loaded)
7
+ # The following lines are placeholders for model loading. You need to load the actual pre-trained models.
8
+ # tacotron2 = Tacotron2()
9
+ # waveglow = WaveGlow()
10
+
11
+ def synthesize_voice(text):
12
+ """
13
+ Synthesize voice from the given text using a pre-trained Tacotron2 and WaveGlow model.
14
+ This function is a simplified placeholder; you need to load actual models and synthesize properly.
15
+ """
16
+ # For the sake of this example, generate a dummy wave
17
+ # In reality, you would generate this using the Tacotron2 and WaveGlow models
18
+ sample_rate = 22050
19
+ duration = 2 # 2 seconds
20
+ audio = torch.sin(torch.linspace(0, duration * 2 * torch.pi, sample_rate * duration))
21
 
22
  # Save the synthesized audio to a file
23
  output_path = "synthesized_voice.wav"
24
+ write(output_path, sample_rate, audio.numpy())
25
  return output_path
26
 
27
  def main():
 
33
 
34
  if uploaded_audio is not None:
35
  st.audio(uploaded_audio, format="audio/wav")
 
36
 
37
  # Textbox to input the text to be cloned
38
  text_input = st.text_area("Enter text for voice cloning")
39
 
40
  if st.button("Generate Cloned Voice"):
41
+ if text_input:
42
+ # Use the text input for voice cloning
43
+ output_path = synthesize_voice(text_input)
44
 
45
+ # Play the generated audio using Streamlit's audio component
46
  st.audio(output_path, format="audio/wav")
47
  st.success("Voice cloning successful!")
48