HKAB commited on
Commit
5ec340b
·
1 Parent(s): dd0d853
Files changed (3) hide show
  1. __pycache__/examples.cpython-310.pyc +0 -0
  2. app.py +6 -3
  3. examples.py +6 -5
__pycache__/examples.cpython-310.pyc CHANGED
Binary files a/__pycache__/examples.cpython-310.pyc and b/__pycache__/examples.cpython-310.pyc differ
 
app.py CHANGED
@@ -105,8 +105,9 @@ title = "# Streaming RNN-T with Whisper Encoder"
105
  description = """
106
  Visit <https://github.com/HKAB/rnnt-whisper-tutorial/> for more information.
107
 
108
- - This model run on CPU
109
- - This model might not work with your microphone since it was trained on a quite clean dataset. Try to speak loudly and clearly 😃
 
110
  """
111
 
112
  def onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_jointer_session, tokenizer):
@@ -242,7 +243,7 @@ def process(
242
  with demo:
243
  gr.Markdown(title)
244
  gr.Markdown(description)
245
- model_type = gr.Radio(["FP32", "INT8 (Quantized)"], label="Model type", value="FP32", info="INT8 model is faster but less accurate")
246
 
247
  with gr.Tabs():
248
  with gr.TabItem("Upload from disk"):
@@ -261,6 +262,7 @@ with demo:
261
  uploaded_file,
262
  model_type
263
  ],
 
264
  outputs=[uploaded_output, uploaded_html_info],
265
  fn=process_uploaded_file,
266
  label="Cherry-picked examples",
@@ -283,6 +285,7 @@ with demo:
283
  microphone,
284
  model_type
285
  ],
 
286
  outputs=[recorded_output, recorded_html_info],
287
  fn=process_microphone,
288
  label="Cherry-picked examples",
 
105
  description = """
106
  Visit <https://github.com/HKAB/rnnt-whisper-tutorial/> for more information.
107
 
108
+ - This model runs on CPU (Free tier) so the RTF of FP32 model is around 1.5.
109
+ - This model mights not work with your microphone since it was trained on a quite clean dataset. Try to speak loudly and clearly 😃
110
+ - Although you upload a full audio file, the model will process it in a streaming fashion.
111
  """
112
 
113
  def onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_jointer_session, tokenizer):
 
243
  with demo:
244
  gr.Markdown(title)
245
  gr.Markdown(description)
246
+ model_type = gr.Radio(["FP32", "INT8"], label="Model type", value="FP32", info="INT8 model is faster but less accurate")
247
 
248
  with gr.Tabs():
249
  with gr.TabItem("Upload from disk"):
 
262
  uploaded_file,
263
  model_type
264
  ],
265
+ cache_mode="lazy",
266
  outputs=[uploaded_output, uploaded_html_info],
267
  fn=process_uploaded_file,
268
  label="Cherry-picked examples",
 
285
  microphone,
286
  model_type
287
  ],
288
+ cache_mode="lazy",
289
  outputs=[recorded_output, recorded_html_info],
290
  fn=process_microphone,
291
  label="Cherry-picked examples",
examples.py CHANGED
@@ -1,17 +1,18 @@
1
  examples = [
2
  [
3
- "./test_wavs/Hue_short.wav"
 
4
  ],
5
  [
6
- "./test_wavs/12345_short.wav"
 
7
  ],
8
  [
9
  "./test_wavs/Trump_long.mp3",
 
10
  ],
11
  [
12
  "./test_wavs/Ucraina_moderate.mp3",
13
- ],
14
- [
15
- "./test_wavs/Duongsat_short.m4a",
16
  ]
17
  ]
 
1
  examples = [
2
  [
3
+ "./test_wavs/Hue_short.wav",
4
+ "FP32",
5
  ],
6
  [
7
+ "./test_wavs/12345_short.wav",
8
+ "FP32",
9
  ],
10
  [
11
  "./test_wavs/Trump_long.mp3",
12
+ "FP32",
13
  ],
14
  [
15
  "./test_wavs/Ucraina_moderate.mp3",
16
+ "FP32",
 
 
17
  ]
18
  ]