Spaces:
Runtime error
Runtime error
initial commit
Browse files- README.md +22 -1
- app.py +109 -0
- requirements.txt +0 -0
README.md
CHANGED
|
@@ -11,4 +11,25 @@ license: mit
|
|
| 11 |
short_description: Compare OpenAI Whisper against Sensevoice Small Resultssssss
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
short_description: Compare OpenAI Whisper against Sensevoice Small Resultssssss
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Whisper vs. FunASR SenseVoice Comparison
|
| 15 |
+
|
| 16 |
+
This Space lets you compare OpenAI Whisper variants against FunAudioLLM’s SenseVoice models for automatic speech recognition (ASR), all via a simple Gradio 5 UI.
|
| 17 |
+
|
| 18 |
+
## 🚀 Demo
|
| 19 |
+
|
| 20 |
+
1. **Select Whisper model** from the dropdown.
|
| 21 |
+
2. **Select SenseVoice model** from the dropdown.
|
| 22 |
+
3. (Optional) **Toggle punctuation** for SenseVoice.
|
| 23 |
+
4. **Upload** an audio file (wav, mp3, etc.) or **record** with your microphone.
|
| 24 |
+
5. Click **Transcribe** to run both ASRs side-by-side.
|
| 25 |
+
|
| 26 |
+
## 📁 Files
|
| 27 |
+
|
| 28 |
+
- **app.py**
|
| 29 |
+
Main Gradio application. Sets up two HF-ASR pipelines and displays their outputs.
|
| 30 |
+
|
| 31 |
+
- **requirements.txt**
|
| 32 |
+
Python dependencies: Gradio, Transformers, Torch, Torchaudio, Accelerate, ffmpeg-python.
|
| 33 |
+
|
| 34 |
+
- **readme.md**
|
| 35 |
+
This documentation.
|
app.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import spaces
|
| 3 |
+
import re
|
| 4 |
+
import torch
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from transformers import pipeline
|
| 7 |
+
|
| 8 |
+
# List of Whisper model IDs
|
| 9 |
+
WHISPER_MODELS = [
|
| 10 |
+
"openai/whisper-large-v3-turbo",
|
| 11 |
+
"openai/whisper-large-v3",
|
| 12 |
+
"openai/whisper-tiny",
|
| 13 |
+
"openai/whisper-small",
|
| 14 |
+
"openai/whisper-medium",
|
| 15 |
+
"openai/whisper-base",
|
| 16 |
+
"JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW",
|
| 17 |
+
"Jingmiao/whisper-small-zh_tw",
|
| 18 |
+
"DDTChen/whisper-medium-zh-tw",
|
| 19 |
+
"kimbochen/whisper-small-zh-tw",
|
| 20 |
+
"ChrisTorng/whisper-large-v3-turbo-common_voice_19_0-zh-TW-ct2",
|
| 21 |
+
"JacobLinCool/whisper-large-v3-turbo-zh-TW-clean-1",
|
| 22 |
+
"JunWorks/whisper-small-zhTW",
|
| 23 |
+
"WANGTINGTING/whisper-large-v2-zh-TW-vol2",
|
| 24 |
+
"xmzhu/whisper-tiny-zh-TW",
|
| 25 |
+
"ingrenn/whisper-small-common-voice-13-zh-TW",
|
| 26 |
+
"jun-han/whisper-small-zh-TW",
|
| 27 |
+
"xmzhu/whisper-tiny-zh-TW-baseline",
|
| 28 |
+
"JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-2",
|
| 29 |
+
"JacobLinCool/whisper-large-v3-common_voice_19_0-zh-TW-full-1",
|
| 30 |
+
"momo103197/whisper-small-zh-TW-mix",
|
| 31 |
+
"JacobLinCool/whisper-large-v3-turbo-zh-TW-clean-1-merged",
|
| 32 |
+
"JacobLinCool/whisper-large-v2-common_voice_19_0-zh-TW-full-1",
|
| 33 |
+
"kimas1269/whisper-meduim_zhtw",
|
| 34 |
+
"JunWorks/whisper-base-zhTW",
|
| 35 |
+
"JunWorks/whisper-small-zhTW-frozenDecoder",
|
| 36 |
+
"sandy1990418/whisper-large-v3-turbo-zh-tw",
|
| 37 |
+
"JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-pissa-merged",
|
| 38 |
+
"momo103197/whisper-small-zh-TW-16",
|
| 39 |
+
"k1nto/Belle-whisper-large-v3-zh-punct-ct2"
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
# List of SenseVoice model IDs
|
| 43 |
+
SENSEVOICE_MODELS = [
|
| 44 |
+
"FunAudioLLM/SenseVoiceSmall",
|
| 45 |
+
"AXERA-TECH/SenseVoice",
|
| 46 |
+
"alextomcat/SenseVoiceSmall",
|
| 47 |
+
"ChenChenyu/SenseVoiceSmall-finetuned",
|
| 48 |
+
"apinge/sensevoice-small"
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
# Cache pipelines
|
| 52 |
+
pipes = {}
|
| 53 |
+
|
| 54 |
+
def get_asr_pipe(model_id):
|
| 55 |
+
if model_id not in pipes:
|
| 56 |
+
# run on GPU if available
|
| 57 |
+
device = 0 if torch.cuda.is_available() else -1
|
| 58 |
+
pipes[model_id] = pipeline("automatic-speech-recognition", model=model_id, device=device)
|
| 59 |
+
return pipes[model_id]
|
| 60 |
+
|
| 61 |
+
@spaces.GPU
|
| 62 |
+
def transcribe(whisper_model, sense_model, audio_path, enable_punct):
|
| 63 |
+
# 1) Whisper
|
| 64 |
+
whisper_pipe = get_asr_pipe(whisper_model)
|
| 65 |
+
whisper_out = whisper_pipe(audio_path)
|
| 66 |
+
text_whisper = whisper_out.get("text", "").strip()
|
| 67 |
+
|
| 68 |
+
# 2) SenseVoice
|
| 69 |
+
sense_pipe = get_asr_pipe(sense_model)
|
| 70 |
+
sense_out = sense_pipe(audio_path)
|
| 71 |
+
text_sense = sense_out.get("text", "").strip()
|
| 72 |
+
|
| 73 |
+
# 3) strip punctuation if disabled
|
| 74 |
+
if not enable_punct:
|
| 75 |
+
text_sense = re.sub(r"[^\w\s]", "", text_sense)
|
| 76 |
+
|
| 77 |
+
return text_whisper, text_sense
|
| 78 |
+
|
| 79 |
+
with gr.Blocks() as demo:
|
| 80 |
+
gr.Markdown("## Whisper vs. FunASR SenseVoice Comparison")
|
| 81 |
+
with gr.Row():
|
| 82 |
+
whisper_dd = gr.Dropdown(
|
| 83 |
+
choices=WHISPER_MODELS,
|
| 84 |
+
value=WHISPER_MODELS[0],
|
| 85 |
+
label="Whisper Model"
|
| 86 |
+
)
|
| 87 |
+
sense_dd = gr.Dropdown(
|
| 88 |
+
choices=SENSEVOICE_MODELS,
|
| 89 |
+
value=SENSEVOICE_MODELS[0],
|
| 90 |
+
label="SenseVoice Model"
|
| 91 |
+
)
|
| 92 |
+
punct = gr.Checkbox(label="Enable Punctuation (SenseVoice)", value=True)
|
| 93 |
+
audio_in = gr.Audio(
|
| 94 |
+
source="upload+microphone",
|
| 95 |
+
type="filepath",
|
| 96 |
+
label="Upload or Record Audio"
|
| 97 |
+
)
|
| 98 |
+
with gr.Row():
|
| 99 |
+
out_whisper = gr.Textbox(label="Whisper Transcript")
|
| 100 |
+
out_sense = gr.Textbox(label="SenseVoice Transcript")
|
| 101 |
+
btn = gr.Button("Transcribe")
|
| 102 |
+
btn.click(
|
| 103 |
+
fn=transcribe,
|
| 104 |
+
inputs=[whisper_dd, sense_dd, audio_in, punct],
|
| 105 |
+
outputs=[out_whisper, out_sense]
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
if __name__ == "__main__":
|
| 109 |
+
demo.launch()
|
requirements.txt
ADDED
|
File without changes
|