Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ os.system("pip install jiwer")
|
|
7 |
from jiwer import wer
|
8 |
os.system("pip install datasets[audio]")
|
9 |
from evaluate import evaluator
|
|
|
10 |
from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
|
11 |
import gradio as gr
|
12 |
|
@@ -16,11 +17,7 @@ disable_caching()
|
|
16 |
huggingface_token = os.environ["huggingface_token"]
|
17 |
pipe = pipeline(model="mskov/whisper-small-esc50")
|
18 |
print(pipe)
|
19 |
-
|
20 |
-
model = WhisperModel.from_pretrained("mskov/whisper-small-miso", use_auth_token=huggingface_token)
|
21 |
-
feature_extractor = AutoFeatureExtractor.from_pretrained("mskov/whisper-small-miso", use_auth_token=huggingface_token)
|
22 |
-
miso_tokenizer = WhisperTokenizer.from_pretrained("mskov/whisper-small-miso", use_auth_token=huggingface_token)
|
23 |
-
'''
|
24 |
dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio(sampling_rate=16000))
|
25 |
|
26 |
print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
|
@@ -37,16 +34,8 @@ iface = gr.Interface(
|
|
37 |
)
|
38 |
|
39 |
iface.launch()
|
40 |
-
'''
|
41 |
-
inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt")
|
42 |
-
print("inputs ::: ", inputs, "and dataset type for good measure: ", type(dataset))
|
43 |
-
tempDataset = dataset[0]["audio"]["array"].tostring()
|
44 |
-
tokenized_dataset = miso_tokenizer(tempDataset) # Tokenize the dataset
|
45 |
|
46 |
-
|
47 |
-
attention_mask = features.attention_mask
|
48 |
-
'''
|
49 |
-
'''
|
50 |
# Evaluate the model
|
51 |
model.eval()
|
52 |
with torch.no_grad():
|
@@ -63,7 +52,7 @@ wer_score = wer(labels, predicted_text)
|
|
63 |
|
64 |
# Print or return WER score
|
65 |
print(f"Word Error Rate (WER): {wer_score}")
|
66 |
-
|
67 |
'''
|
68 |
print("check check")
|
69 |
print(inputs)
|
|
|
7 |
from jiwer import wer
|
8 |
os.system("pip install datasets[audio]")
|
9 |
from evaluate import evaluator
|
10 |
+
import evaluate
|
11 |
from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
|
12 |
import gradio as gr
|
13 |
|
|
|
17 |
huggingface_token = os.environ["huggingface_token"]
|
18 |
pipe = pipeline(model="mskov/whisper-small-esc50")
|
19 |
print(pipe)
|
20 |
+
|
|
|
|
|
|
|
|
|
21 |
dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio(sampling_rate=16000))
|
22 |
|
23 |
print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
|
|
|
34 |
)
|
35 |
|
36 |
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
|
|
|
|
|
|
|
39 |
# Evaluate the model
|
40 |
model.eval()
|
41 |
with torch.no_grad():
|
|
|
52 |
|
53 |
# Print or return WER score
|
54 |
print(f"Word Error Rate (WER): {wer_score}")
|
55 |
+
|
56 |
'''
|
57 |
print("check check")
|
58 |
print(inputs)
|