Spaces:

argmaxinc
/

whisperkit-benchmarks

Running

App Files Files Community

ardaatahan commited on Oct 30, 2024

Commit

50ce297

1 Parent(s): b54f134

fix explanation texts

Browse files

Files changed (3) hide show

constants.py +0 -3
main.py +3 -3
utils.py +1 -1

constants.py CHANGED Viewed

@@ -73,7 +73,6 @@ METHODOLOGY_TEXT = dedent(
     - **WER (Word Error Rate)** (⬇️): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
     - **QoI (Quality of Inference)** (⬆️): The ratio of examples where WhisperKit performs no worse than the reference model.
         - This metric does not capture improvements to the reference. It only measures potential regressions.
-    - **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
     - **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
     ## Data
@@ -107,7 +106,6 @@ PERFORMANCE_TEXT = dedent(
     ## Metrics
     - **Speed factor** (⬆️): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
     - **Tok/s (Tokens per second)** (⬆️): Total number of text decoder forward passes divided by the end-to-end processing time.
-    - **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
     ## Data
@@ -136,7 +134,6 @@ COL_NAMES = {
     "model": "Model",
     "device": "Device",
     "os": "OS",
-    "parity": "Parity %",
     "english_wer": "English WER",
     "multilingual_wer": "Multilingual WER",
 }

     - **WER (Word Error Rate)** (⬇️): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
     - **QoI (Quality of Inference)** (⬆️): The ratio of examples where WhisperKit performs no worse than the reference model.
         - This metric does not capture improvements to the reference. It only measures potential regressions.
     - **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
     ## Data
     ## Metrics
     - **Speed factor** (⬆️): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
     - **Tok/s (Tokens per second)** (⬆️): Total number of text decoder forward passes divided by the end-to-end processing time.
     ## Data
     "model": "Model",
     "device": "Device",
     "os": "OS",
     "english_wer": "English WER",
     "multilingual_wer": "Multilingual WER",
 }

main.py CHANGED Viewed

@@ -522,7 +522,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
                             )
                         with gr.Column(scale=4, elem_classes="exclude_models_column"):
                             exclude_performance_models = gr.Textbox(
-                                placeholder="🔍 Exclude (separate multiple queries with ';')",
                                 label="Exclude Models",
                             )
                     with gr.Row():
@@ -757,7 +757,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
                             )
                         with gr.Column(scale=4, elem_classes="exclude_models_column"):
                             exclude_quality_models = gr.Textbox(
-                                placeholder="🔍 Exclude Model (separate multiple models with ';')",
                                 label="Exclude Models",
                             )
                     with gr.Row():
@@ -1184,7 +1184,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
                             )
                         with gr.Column(scale=4, elem_classes="exclude_models_column"):
                             exclude_support_models = gr.Textbox(
-                                placeholder="🔍 Exclude Model (separate multiple models with ';')",
                                 label="Exclude Models",
                             )
                     with gr.Row():

                             )
                         with gr.Column(scale=4, elem_classes="exclude_models_column"):
                             exclude_performance_models = gr.Textbox(
+                                placeholder="🔍 Exclude Model (separate multiple queries with ';')",
                                 label="Exclude Models",
                             )
                     with gr.Row():
                             )
                         with gr.Column(scale=4, elem_classes="exclude_models_column"):
                             exclude_quality_models = gr.Textbox(
+                                placeholder="🔍 Exclude Model (separate multiple queries with ';')",
                                 label="Exclude Models",
                             )
                     with gr.Row():
                             )
                         with gr.Column(scale=4, elem_classes="exclude_models_column"):
                             exclude_support_models = gr.Textbox(
+                                placeholder="🔍 Exclude Model (separate multiple queries with ';')",
                                 label="Exclude Models",
                             )
                     with gr.Row():

utils.py CHANGED Viewed

@@ -545,7 +545,7 @@ def create_initial_performance_column_dict():
     :return: A list of column dictionaries
     This function defines the basic structure of the performance table,
-    including columns for model, device, OS, parity, average WER, QoI, speed, and tokens per second.
     """
     return [
         [

     :return: A list of column dictionaries
     This function defines the basic structure of the performance table,
+    including columns for model, device, OS, average WER, QoI, speed, and tokens per second.
     """
     return [
         [