Spaces:

GiftMark
/

AkanWhisperV.1

Sleeping

App Files Files Community

AkanWhisperV.1 / app.py

GiftMark

Upload 2 files

2f6398c verified 2 months ago

raw

history blame contribute delete

1.83 kB

	import gradio as gr
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	import torch
	import numpy as np
	from scipy.signal import resample

	# app.py
	# Load your model (use_auth_token for private models)
	import os
	hf_token = os.getenv("HF_TOKEN")
	model = WhisperForConditionalGeneration.from_pretrained("GiftMark/akan-whisper-model", token=hf_token)
	processor = WhisperProcessor.from_pretrained("GiftMark/akan-whisper-model", token=hf_token)


	def transcribe(audio):
	try:
	if audio is None:
	return "No audio provided."
	sampling_rate, data = audio
	data = np.array(data).astype(np.float32)
	# Ensure mono
	if len(data.shape) > 1:
	data = data[:, 0]
	# Resample if needed
	target_sr = 16000
	if sampling_rate != target_sr:
	# Calculate number of samples after resampling
	duration = data.shape[0] / sampling_rate
	new_length = int(duration * target_sr)
	data = resample(data, new_length)
	sampling_rate = target_sr

	inputs = processor(
	data, sampling_rate=sampling_rate, return_tensors="pt"
	).input_features

	with torch.no_grad():
	predicted_ids = model.generate(inputs)
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
	return transcription
	except Exception as e:
	print("Error during transcription:", e)
	return f"Error: {e}"

	demo = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(sources=["microphone", "upload"], type="numpy", label="Record or upload Akan audio"),
	outputs=gr.Textbox(label="Transcription"),
	title="Akan Speech-to-Text Demo",
	description="Record or upload Akan audio to test the Whisper ASR model."
	)

	demo.launch()