Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ import evaluate
|
|
11 |
from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
|
12 |
import gradio as gr
|
13 |
import torch
|
|
|
14 |
|
15 |
set_caching_enabled(False)
|
16 |
disable_caching()
|
@@ -28,7 +29,16 @@ model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc
|
|
28 |
# Evaluate the model
|
29 |
# model.eval()
|
30 |
#print("model.eval ", model.eval())
|
|
|
|
|
|
|
|
|
|
|
31 |
def map_to_pred(batch):
|
|
|
|
|
|
|
|
|
32 |
audio = batch["audio"]
|
33 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
34 |
batch["reference"] = processor.tokenizer._normalize(batch['sentence'])
|
|
|
11 |
from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
|
12 |
import gradio as gr
|
13 |
import torch
|
14 |
+
import re
|
15 |
|
16 |
set_caching_enabled(False)
|
17 |
disable_caching()
|
|
|
29 |
# Evaluate the model
|
30 |
# model.eval()
|
31 |
#print("model.eval ", model.eval())
|
32 |
+
|
33 |
+
|
34 |
+
# Remove brackets and extra spaces
|
35 |
+
|
36 |
+
|
37 |
def map_to_pred(batch):
|
38 |
+
cleaned_transcription = re.sub(r'\[[^\]]+\]', '', batch).strip()
|
39 |
+
cleaned_transcription = preprocess_transcription(batch['sentence'])
|
40 |
+
normalized_transcription = processor.tokenizer._normalize(cleaned_transcription)
|
41 |
+
|
42 |
audio = batch["audio"]
|
43 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
44 |
batch["reference"] = processor.tokenizer._normalize(batch['sentence'])
|