Macedonian-ASR / app.py
cigol123's picture
Update app.py
2680cc6 verified
raw
history blame
2.18 kB
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import soundfile as sf
import numpy as np
from scipy import signal
import os
# Set the cache directory to a writable location
cache_dir = "/tmp/.cache"
os.environ["TRANSFORMERS_CACHE"] = cache_dir
os.environ["HF_DATASETS_CACHE"] = cache_dir
os.environ["TORCH_HOME"] = cache_dir # Set PyTorch cache directory
# Ensure the cache directory exists and is writable
os.makedirs(cache_dir, exist_ok=True)
# Load the fine-tuned Macedonian-ASR Whisper model and processor
def load_model():
print("Loading fine-tuned Macedonian-ASR Whisper model and processor...")
processor = WhisperProcessor.from_pretrained("Macedonian-ASR/whisper-large-v3-macedonian-asr")
model = WhisperForConditionalGeneration.from_pretrained("Macedonian-ASR/whisper-large-v3-macedonian-asr")
print("✓ Model and processor loaded successfully!")
return processor, model
processor, model = load_model()
def process_audio(audio_path):
# Load and resample to 16kHz using scipy
waveform, sr = sf.read(audio_path)
if len(waveform.shape) > 1: # Convert stereo to mono
waveform = waveform.mean(axis=1)
if sr != 16000: # Resample if necessary
num_samples = int(len(waveform) * 16000 / sr)
waveform = signal.resample(waveform, num_samples)
# Process the audio
inputs = processor(waveform, sampling_rate=16000, return_tensors="pt")
predicted_ids = model.generate(**inputs, language="mk")
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
# Gradio interface
demo = gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
outputs="text",
title="Македонско препознавање на говор / Macedonian Speech Recognition",
description="Качете аудио или користете микрофон за транскрипција на македонски говор / Upload audio or use microphone to transcribe Macedonian speech"
)
if __name__ == "__main__":
demo.launch()