Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,20 @@ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
|
|
8 |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
9 |
|
10 |
#load audio file
|
11 |
-
speech, rate = librosa.load("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
|
14 |
|
|
|
8 |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
9 |
|
10 |
#load audio file
|
11 |
+
speech, rate = librosa.load("/hip-voice.m4a",sr=16000)
|
12 |
+
|
13 |
+
import IPython.display as display
|
14 |
+
display.Audio("batman1.wav", autoplay=True)
|
15 |
+
|
16 |
+
input_values = tokenizer(speech, return_tensors = 'pt').input_values
|
17 |
+
logits = model(input_values).logits
|
18 |
+
|
19 |
+
predicted_ids = torch.argmax(logits, dim =-1)
|
20 |
+
|
21 |
+
#decode the audio to generate text
|
22 |
+
transcriptions = tokenizer.decode(predicted_ids[0])
|
23 |
+
|
24 |
+
print(transcriptions)
|
25 |
|
26 |
|
27 |
|