Spaces:
Running
Running
Commit
Β·
50ce297
1
Parent(s):
b54f134
fix explanation texts
Browse files- constants.py +0 -3
- main.py +3 -3
- utils.py +1 -1
constants.py
CHANGED
|
@@ -73,7 +73,6 @@ METHODOLOGY_TEXT = dedent(
|
|
| 73 |
- **WER (Word Error Rate)** (β¬οΈ): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
|
| 74 |
- **QoI (Quality of Inference)** (β¬οΈ): The ratio of examples where WhisperKit performs no worse than the reference model.
|
| 75 |
- This metric does not capture improvements to the reference. It only measures potential regressions.
|
| 76 |
-
- **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
|
| 77 |
- **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
|
| 78 |
|
| 79 |
## Data
|
|
@@ -107,7 +106,6 @@ PERFORMANCE_TEXT = dedent(
|
|
| 107 |
## Metrics
|
| 108 |
- **Speed factor** (β¬οΈ): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
|
| 109 |
- **Tok/s (Tokens per second)** (β¬οΈ): Total number of text decoder forward passes divided by the end-to-end processing time.
|
| 110 |
-
- **Parity %**: The percentage difference between a model's Average WER on a given device and its Average WER on the Apple M2 Ultra, where a negative value indicates worse performance compared to the M2 Ultra.
|
| 111 |
|
| 112 |
## Data
|
| 113 |
|
|
@@ -136,7 +134,6 @@ COL_NAMES = {
|
|
| 136 |
"model": "Model",
|
| 137 |
"device": "Device",
|
| 138 |
"os": "OS",
|
| 139 |
-
"parity": "Parity %",
|
| 140 |
"english_wer": "English WER",
|
| 141 |
"multilingual_wer": "Multilingual WER",
|
| 142 |
}
|
|
|
|
| 73 |
- **WER (Word Error Rate)** (β¬οΈ): The ratio of words incorrectly transcribed when comparing the model's output to reference transcriptions, with lower values indicating better accuracy.
|
| 74 |
- **QoI (Quality of Inference)** (β¬οΈ): The ratio of examples where WhisperKit performs no worse than the reference model.
|
| 75 |
- This metric does not capture improvements to the reference. It only measures potential regressions.
|
|
|
|
| 76 |
- **Multilingual results**: Separated into "language hinted" and "language predicted" categories to evaluate performance with and without prior knowledge of the input language.
|
| 77 |
|
| 78 |
## Data
|
|
|
|
| 106 |
## Metrics
|
| 107 |
- **Speed factor** (β¬οΈ): Computed as the ratio of input audio length to end-to-end WhisperKit latency for transcribing that audio. A speed factor of N means N seconds of input audio was transcribed in 1 second.
|
| 108 |
- **Tok/s (Tokens per second)** (β¬οΈ): Total number of text decoder forward passes divided by the end-to-end processing time.
|
|
|
|
| 109 |
|
| 110 |
## Data
|
| 111 |
|
|
|
|
| 134 |
"model": "Model",
|
| 135 |
"device": "Device",
|
| 136 |
"os": "OS",
|
|
|
|
| 137 |
"english_wer": "English WER",
|
| 138 |
"multilingual_wer": "Multilingual WER",
|
| 139 |
}
|
main.py
CHANGED
|
@@ -522,7 +522,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
|
|
| 522 |
)
|
| 523 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
| 524 |
exclude_performance_models = gr.Textbox(
|
| 525 |
-
placeholder="π Exclude (separate multiple queries with ';')",
|
| 526 |
label="Exclude Models",
|
| 527 |
)
|
| 528 |
with gr.Row():
|
|
@@ -757,7 +757,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
|
|
| 757 |
)
|
| 758 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
| 759 |
exclude_quality_models = gr.Textbox(
|
| 760 |
-
placeholder="π Exclude Model (separate multiple
|
| 761 |
label="Exclude Models",
|
| 762 |
)
|
| 763 |
with gr.Row():
|
|
@@ -1184,7 +1184,7 @@ with gr.Blocks(css=css, theme=gr.themes.Base(font=font)) as demo:
|
|
| 1184 |
)
|
| 1185 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
| 1186 |
exclude_support_models = gr.Textbox(
|
| 1187 |
-
placeholder="π Exclude Model (separate multiple
|
| 1188 |
label="Exclude Models",
|
| 1189 |
)
|
| 1190 |
with gr.Row():
|
|
|
|
| 522 |
)
|
| 523 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
| 524 |
exclude_performance_models = gr.Textbox(
|
| 525 |
+
placeholder="π Exclude Model (separate multiple queries with ';')",
|
| 526 |
label="Exclude Models",
|
| 527 |
)
|
| 528 |
with gr.Row():
|
|
|
|
| 757 |
)
|
| 758 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
| 759 |
exclude_quality_models = gr.Textbox(
|
| 760 |
+
placeholder="π Exclude Model (separate multiple queries with ';')",
|
| 761 |
label="Exclude Models",
|
| 762 |
)
|
| 763 |
with gr.Row():
|
|
|
|
| 1184 |
)
|
| 1185 |
with gr.Column(scale=4, elem_classes="exclude_models_column"):
|
| 1186 |
exclude_support_models = gr.Textbox(
|
| 1187 |
+
placeholder="π Exclude Model (separate multiple queries with ';')",
|
| 1188 |
label="Exclude Models",
|
| 1189 |
)
|
| 1190 |
with gr.Row():
|
utils.py
CHANGED
|
@@ -545,7 +545,7 @@ def create_initial_performance_column_dict():
|
|
| 545 |
:return: A list of column dictionaries
|
| 546 |
|
| 547 |
This function defines the basic structure of the performance table,
|
| 548 |
-
including columns for model, device, OS,
|
| 549 |
"""
|
| 550 |
return [
|
| 551 |
[
|
|
|
|
| 545 |
:return: A list of column dictionaries
|
| 546 |
|
| 547 |
This function defines the basic structure of the performance table,
|
| 548 |
+
including columns for model, device, OS, average WER, QoI, speed, and tokens per second.
|
| 549 |
"""
|
| 550 |
return [
|
| 551 |
[
|