AlexK-PL commited on
Commit
92be68f
·
1 Parent(s): 2f6ba98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -5
app.py CHANGED
@@ -44,8 +44,19 @@ vocoder_model.eval(inference=False)
44
 
45
 
46
  def plot_spec_align(mel, align):
 
 
 
 
 
 
 
 
 
 
 
 
47
  grid_spec = gridspec.GridSpec(1, 1)
48
-
49
  ax = plt.subplot(grid_spec[0])
50
  plt.imshow(mel)
51
  plt.axis('off')
@@ -58,8 +69,9 @@ def plot_spec_align(mel, align):
58
 
59
  plt.imshow(legend, interpolation='nearest')
60
  plt.grid('off')
 
61
 
62
- return plt
63
 
64
 
65
  def synthesize(text, gst_1, gst_2, gst_3):
@@ -80,14 +92,14 @@ def synthesize(text, gst_1, gst_2, gst_3):
80
  # prepare plot for the output:
81
  mel_outputs_postnet = mel_outputs_postnet.squeeze().detach().numpy()
82
  alignments = alignments.squeeze().detach().numpy()
83
- plt = plot_spec_align(mel_outputs_postnet, alignments)
84
 
85
- return (22050, audio_numpy), plt
86
 
87
 
88
  iface = gr.Interface(fn=synthesize, inputs=[gr.Textbox(label="Input Text"), gr.Slider(0.2, 0.45, label="First style token weight:"),
89
  gr.Slider(0.2, 0.45, label="Second style token weight:"), gr.Slider(0.2, 0.45, label="Third style token weight:")],
90
- outputs=[gr.Audio(label="Generated Speech", type="numpy"), gr.outputs.Image(type="plot", label="Output"),],
91
  title="Single-Head Attention Tacotron2 with Style Tokens", description=DESCRIPTION)
92
  iface.launch()
93
 
 
44
 
45
 
46
  def plot_spec_align(mel, align):
47
+
48
+ fig_mel = plt.figure()
49
+ ax_mel = fig_mel.add_subplot(111)
50
+ ax_mel.imshow(mel)
51
+ ax_mel.set_title('Mel-Scale Spectrogram', fontsize=20)
52
+
53
+ fig_align = plt.figure()
54
+ ax_align = fig_align.add_subplot(111)
55
+ ax_align.imshow(align)
56
+ ax_align.set_title('Alignment', fontsize=20)
57
+
58
+ '''
59
  grid_spec = gridspec.GridSpec(1, 1)
 
60
  ax = plt.subplot(grid_spec[0])
61
  plt.imshow(mel)
62
  plt.axis('off')
 
69
 
70
  plt.imshow(legend, interpolation='nearest')
71
  plt.grid('off')
72
+ '''
73
 
74
+ return fig_mel, fig_align
75
 
76
 
77
  def synthesize(text, gst_1, gst_2, gst_3):
 
92
  # prepare plot for the output:
93
  mel_outputs_postnet = mel_outputs_postnet.squeeze().detach().numpy()
94
  alignments = alignments.squeeze().detach().numpy()
95
+ fig_mel, fig_align = plot_spec_align(mel_outputs_postnet, alignments)
96
 
97
+ return (22050, audio_numpy), fig_mel, fig_align
98
 
99
 
100
  iface = gr.Interface(fn=synthesize, inputs=[gr.Textbox(label="Input Text"), gr.Slider(0.2, 0.45, label="First style token weight:"),
101
  gr.Slider(0.2, 0.45, label="Second style token weight:"), gr.Slider(0.2, 0.45, label="Third style token weight:")],
102
+ outputs=[gr.Audio(label="Generated Speech", type="numpy"), gr.Plot(label="Spectrogram"), gr.Plot(label="Alignments")],
103
  title="Single-Head Attention Tacotron2 with Style Tokens", description=DESCRIPTION)
104
  iface.launch()
105