Spaces:

fawzanaramam
/

amma-juz

Running

App Files Files Community

amma-juz / app.py

fawzanaramam

Update app.py

868ba95 verified 20 days ago

raw

history blame contribute delete

2.7 kB

	from transformers import pipeline
	import gradio as gr
	from pydub import AudioSegment
	import os
	import mimetypes
	from pydub.silence import detect_nonsilent

	pipe = pipeline(model="fawzanaramam/the-truth-amma-juz")

	def transcribe(audio):
	if audio is None:
	return "No audio input provided."

	try:
	# Validate file type (Fixed Condition)
	allowed_extensions = {".wav", ".mp3", ".flac", ".ogg", ".m4a"}
	file_extension = os.path.splitext(audio)[-1].lower()

	if file_extension not in allowed_extensions:
	return f"Unsupported file format: {file_extension}. Please upload a .wav, .mp3, .flac, .ogg, or .m4a file."

	# Convert .m4a to .wav if needed
	if file_extension == ".m4a":
	audio_wav = audio.replace(".m4a", ".wav")
	sound = AudioSegment.from_file(audio, format="m4a")
	sound.export(audio_wav, format="wav")
	audio = audio_wav # Use the converted file
	else:
	sound = AudioSegment.from_file(audio)

	# Check for empty or corrupted file
	if len(sound) == 0:
	return "Uploaded file is empty or corrupted. Please try again."

	# Limit audio duration to 5 minutes
	max_duration = 300_000 # 5 minutes in milliseconds
	if len(sound) > max_duration:
	return "Audio is too long. Please upload a file shorter than 5 minutes."

	# Check if speech is present
	nonsilent_ranges = detect_nonsilent(sound, min_silence_len=1000, silence_thresh=-40)
	if not nonsilent_ranges:
	return "No speech detected in the audio."

	# Transcribe
	text = pipe(audio)["text"]

	# Cleanup
	if file_extension == ".m4a" and os.path.exists(audio):
	os.remove(audio)

	return text
	except RuntimeError as e:
	if "CUDA out of memory" in str(e):
	return "Server is out of memory. Try a shorter audio file."
	except Exception as e:
	return f"Error processing audio: {str(e)}"

	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), # Limit file size to 50MB
	outputs="text",
	title="📖 Whisper Small - Holy Quran (Amma Juz) Transcription",
	description=(
	"Fine-tuned Whisper model for recognizing and transcribing Holy Quran.\n\n"
	"Guidelines:\n"
	"- Upload clear audio with minimal background noise.\n"
	"- Supports `.wav`, `.mp3`, `.flac`, `.ogg`, and `.m4a`.\n"
	"- Max duration: 5 minutes. Max file size: 50MB.\n"
	"- Allow microphone access if recording."
	),
	)

	iface.launch(debug=True)