nirajandhakal commited on
Commit
80f8097
·
verified ·
1 Parent(s): c3c4d10

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
+ from datasets import load_dataset
5
+
6
+ # Set up the device (GPU or CPU)
7
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
9
+
10
+ # Load the model and processor
11
+ model_id = "ylacombe/whisper-large-v3-turbo"
12
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
13
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
14
+ )
15
+ model.to(device)
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+
18
+ # Create a pipeline for speech recognition
19
+ pipe = pipeline(
20
+ "automatic-speech-recognition",
21
+ model=model,
22
+ tokenizer=processor.tokenizer,
23
+ feature_extractor=processor.feature_extractor,
24
+ torch_dtype=torch_dtype,
25
+ device=device,
26
+ )
27
+
28
+ def transcribe_audio(audio):
29
+ # Preprocess the audio
30
+ audio_input = processor(audio, return_tensors="pt", sampling_rate=16000)
31
+ audio_input = audio_input.to(device)
32
+
33
+ # Run the pipeline to get the transcription
34
+ result = pipe(audio_input)
35
+ return result["text"]
36
+
37
+ # Create a Gradio interface
38
+ demo = gr.Interface(
39
+ transcribe_audio,
40
+ inputs=gr.Audio(source="upload", type="file"),
41
+ outputs="text",
42
+ title="Speech-to-Text Transcription",
43
+ description="Upload an audio file to transcribe its content.",
44
+ )
45
+
46
+ # Launch the Gradio app
47
+ demo.launch()