233-Yorozuya commited on
Commit
d8feb58
·
verified ·
1 Parent(s): 1f1c526

Create asr.py

Browse files
Files changed (1) hide show
  1. asr.py +34 -0
asr.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
+ import torchaudio
4
+
5
+ # Load the model
6
+ @st.cache_resource
7
+ def load_model():
8
+ processor = WhisperProcessor.from_pretrained("233-Yorozuya/dl_whisper_model")
9
+ model = WhisperForConditionalGeneration.from_pretrained("233-Yorozuya/dl_whisper_model")
10
+ return processor, model
11
+
12
+ processor, model = load_model()
13
+
14
+ st.title("ASR with Fine-Tuned Whisper")
15
+ st.write("Upload an audio file for transcription:")
16
+
17
+ # File upload
18
+ audio_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"])
19
+
20
+ if audio_file:
21
+ # Load and preprocess audio
22
+ audio, rate = torchaudio.load(audio_file)
23
+ audio = torchaudio.transforms.Resample(orig_freq=rate, new_freq=16000)(audio)
24
+ inputs = processor(audio[0].numpy(), sampling_rate=16000, return_tensors="pt")
25
+
26
+ # Perform inference
27
+ with st.spinner("Transcribing..."):
28
+ predicted_ids = model.generate(inputs.input_features)
29
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
30
+
31
+ # Display result
32
+ st.subheader("Transcription")
33
+ st.write(transcription)
34
+