Spaces:

cigol123
/

Macedonian-ASR

Running

App Files Files Community

Macedonian-ASR / app.py

cigol123

Update app.py

2680cc6 verified 19 days ago

raw

history blame

2.18 kB

	import gradio as gr
	import torch
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	import soundfile as sf
	import numpy as np
	from scipy import signal
	import os

	# Set the cache directory to a writable location
	cache_dir = "/tmp/.cache"
	os.environ["TRANSFORMERS_CACHE"] = cache_dir
	os.environ["HF_DATASETS_CACHE"] = cache_dir
	os.environ["TORCH_HOME"] = cache_dir # Set PyTorch cache directory

	# Ensure the cache directory exists and is writable
	os.makedirs(cache_dir, exist_ok=True)

	# Load the fine-tuned Macedonian-ASR Whisper model and processor
	def load_model():
	print("Loading fine-tuned Macedonian-ASR Whisper model and processor...")
	processor = WhisperProcessor.from_pretrained("Macedonian-ASR/whisper-large-v3-macedonian-asr")
	model = WhisperForConditionalGeneration.from_pretrained("Macedonian-ASR/whisper-large-v3-macedonian-asr")
	print("✓ Model and processor loaded successfully!")
	return processor, model

	processor, model = load_model()

	def process_audio(audio_path):
	# Load and resample to 16kHz using scipy
	waveform, sr = sf.read(audio_path)
	if len(waveform.shape) > 1: # Convert stereo to mono
	waveform = waveform.mean(axis=1)
	if sr != 16000: # Resample if necessary
	num_samples = int(len(waveform) * 16000 / sr)
	waveform = signal.resample(waveform, num_samples)

	# Process the audio
	inputs = processor(waveform, sampling_rate=16000, return_tensors="pt")
	predicted_ids = model.generate(**inputs, language="mk")
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
	return transcription

	# Gradio interface
	demo = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
	outputs="text",
	title="Македонско препознавање на говор / Macedonian Speech Recognition",
	description="Качете аудио или користете микрофон за транскрипција на македонски говор / Upload audio or use microphone to transcribe Macedonian speech"
	)

	if __name__ == "__main__":
	demo.launch()