Spaces:

hkab
/

vietnamese-rnnt-demo

Sleeping

App Files Files Community

HKAB commited on Feb 28

Commit

dd0d853

1 Parent(s): 486b001

add quant model

Browse files

Files changed (5) hide show

__pycache__/examples.cpython-310.pyc +0 -0
app.py +25 -8
onnx/decoder_160_8-infer.quant.onnx +3 -0
onnx/encoder_160_8-infer.quant.onnx +3 -0
onnx/jointer_160_8-infer.quant.onnx +3 -0

__pycache__/examples.cpython-310.pyc ADDED Viewed

Binary file (334 Bytes). View file

app.py CHANGED Viewed

@@ -38,6 +38,10 @@ ort_encoder_session = ort.InferenceSession("./onnx/encoder_160_8.onnx")
 ort_decoder_session = ort.InferenceSession("./onnx/decoder_160_8.onnx")
 ort_jointer_session = ort.InferenceSession("./onnx/jointer_160_8.onnx")
 demo = gr.Blocks()
 def build_html_output(s: str, style: str = "result_item_success"):
@@ -55,7 +59,8 @@ def MyPrint(s):
     print(f"{date_time}: {s}")
 def process_microphone(
-    in_filename: str
 ):
     if in_filename is None or in_filename == "":
         return "", build_html_output(
@@ -69,6 +74,7 @@ def process_microphone(
     try:
         return process(
             in_filename=in_filename,
         )
     except Exception as e:
         MyPrint(str(e))
@@ -76,6 +82,7 @@ def process_microphone(
 def process_uploaded_file(
     in_filename: str,
 ):
     if in_filename is None or in_filename == "":
         return "", build_html_output(
@@ -87,7 +94,8 @@ def process_uploaded_file(
     MyPrint(f"Processing uploaded file: {in_filename}")
     try:
         return process(
-            in_filename=in_filename
         )
     except Exception as e:
         MyPrint(str(e))
@@ -194,9 +202,8 @@ def onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_j
 def process(
     in_filename: str,
 ):
-    # filename = convert_to_wav(in_filename)
     now = datetime.now()
     date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
     MyPrint(f"Started at {date_time}")
@@ -208,7 +215,12 @@ def process(
     duration = len(audio) / SAMPLE_RATE
     audio = np.expand_dims(audio, 0).astype(np.float32)
-    text = onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_jointer_session, tokenizer)
     date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
     end = time.time()
@@ -230,6 +242,7 @@ def process(
 with demo:
     gr.Markdown(title)
     gr.Markdown(description)
     with gr.Tabs():
         with gr.TabItem("Upload from disk"):
@@ -245,7 +258,8 @@ with demo:
             gr.Examples(
                 examples=examples,
                 inputs=[
-                    uploaded_file
                 ],
                 outputs=[uploaded_output, uploaded_html_info],
                 fn=process_uploaded_file,
@@ -266,7 +280,8 @@ with demo:
             gr.Examples(
                 examples=examples,
                 inputs=[
-                    microphone
                 ],
                 outputs=[recorded_output, recorded_html_info],
                 fn=process_microphone,
@@ -276,7 +291,8 @@ with demo:
         upload_button.click(
             process_uploaded_file,
             inputs=[
-                uploaded_file
             ],
             outputs=[uploaded_output, uploaded_html_info],
         )
@@ -285,6 +301,7 @@ with demo:
             process_microphone,
             inputs=[
                 microphone,
             ],
             outputs=[recorded_output, recorded_html_info],
         )

 ort_decoder_session = ort.InferenceSession("./onnx/decoder_160_8.onnx")
 ort_jointer_session = ort.InferenceSession("./onnx/jointer_160_8.onnx")
+ort_encoder_session_quant = ort.InferenceSession("./onnx/encoder_160_8-infer.quant.onnx")
+ort_decoder_session_quant = ort.InferenceSession("./onnx/decoder_160_8-infer.quant.onnx")
+ort_jointer_session_quant = ort.InferenceSession("./onnx/jointer_160_8-infer.quant.onnx")
 demo = gr.Blocks()
 def build_html_output(s: str, style: str = "result_item_success"):
     print(f"{date_time}: {s}")
 def process_microphone(
+    in_filename: str,
+    model_type: str
 ):
     if in_filename is None or in_filename == "":
         return "", build_html_output(
     try:
         return process(
             in_filename=in_filename,
+            model_type=model_type
         )
     except Exception as e:
         MyPrint(str(e))
 def process_uploaded_file(
     in_filename: str,
+    model_type: str
 ):
     if in_filename is None or in_filename == "":
         return "", build_html_output(
     MyPrint(f"Processing uploaded file: {in_filename}")
     try:
         return process(
+            in_filename=in_filename,
+            model_type=model_type
         )
     except Exception as e:
         MyPrint(str(e))
 def process(
     in_filename: str,
+    model_type: str
 ):
     now = datetime.now()
     date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
     MyPrint(f"Started at {date_time}")
     duration = len(audio) / SAMPLE_RATE
     audio = np.expand_dims(audio, 0).astype(np.float32)
+    if model_type == "FP32":
+        MyPrint("Using FP32 model")
+        text = onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_jointer_session, tokenizer)
+    else:
+        MyPrint("Using INT8 model")
+        text = onnx_online_inference(audio, ort_encoder_session_quant, ort_decoder_session_quant, ort_jointer_session_quant, tokenizer)
     date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
     end = time.time()
 with demo:
     gr.Markdown(title)
     gr.Markdown(description)
+    model_type = gr.Radio(["FP32", "INT8 (Quantized)"], label="Model type", value="FP32", info="INT8 model is faster but less accurate")
     with gr.Tabs():
         with gr.TabItem("Upload from disk"):
             gr.Examples(
                 examples=examples,
                 inputs=[
+                    uploaded_file,
+                    model_type
                 ],
                 outputs=[uploaded_output, uploaded_html_info],
                 fn=process_uploaded_file,
             gr.Examples(
                 examples=examples,
                 inputs=[
+                    microphone,
+                    model_type
                 ],
                 outputs=[recorded_output, recorded_html_info],
                 fn=process_microphone,
         upload_button.click(
             process_uploaded_file,
             inputs=[
+                uploaded_file,
+                model_type
             ],
             outputs=[uploaded_output, uploaded_html_info],
         )
             process_microphone,
             inputs=[
                 microphone,
+                model_type
             ],
             outputs=[recorded_output, recorded_html_info],
         )

onnx/decoder_160_8-infer.quant.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:978b787f710a2be2598360bc77e181c0cb0ec004555716b90041b9e8c43a06c3
+size 17324565

onnx/encoder_160_8-infer.quant.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95ceeac88d898e1d4d275a185ba891580604bcfd44c0b3611530e8613c23b8f4
+size 101468916

onnx/jointer_160_8-infer.quant.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f68c8210d14abce4ca065ed8bf6d0141666c7f66a74bd67cf9c63aef4c989ec6
+size 793884