Spaces:

badrex
/

arabic-dialect-identifier-demo

Running on Zero

App Files Files Community

arabic-dialect-identifier-demo / app.py

badrex

edit App title

3f395fc 8 months ago

raw

history blame

2.82 kB

	import gradio as gr
	from transformers import pipeline
	import os

	# Load the model
	print("Loading model...")
	model_id = "badrex/mms-300m-arabic-dialect-identifier"
	classifier = pipeline("audio-classification", model=model_id)
	print("Model loaded successfully")

	# Define dialect mapping
	dialect_mapping = {
	"MSA": "Modern Standard Arabic",
	"Egyptian": "Egyptian Arabic",
	"Gulf": "Gulf Arabic",
	"Levantine": "Levantine Arabic",
	"Maghrebi": "Maghrebi Arabic"
	}

	def predict_dialect(audio):
	if audio is None:
	return {"Error": 1.0}

	# The audio input from Gradio is a tuple of (sample_rate, audio_array)
	sr, audio_array = audio

	# Process the audio input
	if len(audio_array.shape) > 1:
	audio_array = audio_array.mean(axis=1) # Convert stereo to mono

	print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")

	# Classify the dialect
	predictions = classifier({"sampling_rate": sr, "raw": audio_array})

	# Format results for display
	results = {}
	for pred in predictions:
	dialect_name = dialect_mapping.get(pred['label'], pred['label'])
	results[dialect_name] = float(pred['score'])

	return results

	# Manually prepare example file paths without metadata
	examples = []
	examples_dir = "examples"
	if os.path.exists(examples_dir):
	for filename in os.listdir(examples_dir):
	if filename.endswith((".wav", ".mp3", ".ogg")):
	examples.append([os.path.join(examples_dir, filename)])

	print(f"Found {len(examples)} example files")
	else:
	print("Examples directory not found")

	# Create the Gradio interface
	demo = gr.Interface(
	fn=predict_dialect,
	inputs=gr.Audio(),
	outputs=gr.Label(num_top_classes=5, label="Predicted Dialect"),
	title="🎙️ Arabic Dialect Identification in Speech!",
	description="""
	Use this AI-powered tool to identify five major Arabic varieties from just a short audio clip:

	✦ Modern Standard Arabic (MSA) - The formal language of media and education

	✦ Egyptian Arabic - The dialect of Cairo, Alexandria, and popular Arabic cinema

	✦ Gulf Arabic - Spoken across Saudi Arabia, UAE, Kuwait, Qatar, Bahrain, and Oman

	✦ Levantine Arabic - The dialect of Syria, Lebanon, Jordan, and Palestine

	✦ Maghrebi Arabic - The distinctive varieties of Morocco, Algeria, Tunisia, and Libya

	Simply upload an audio file or record yourself speaking to see which dialect you match! Perfect for language learners, linguistics enthusiasts, or anyone curious about Arabic language variation.""",
	examples=examples if examples else None,
	cache_examples=False, # Disable caching to avoid issues
	flagging_mode=None
	)

	# Launch the app
	demo.launch()