ExpertFlow-ASR / app.py
ukaAi's picture
Create app.py
6cbe6e3 verified
raw
history blame
4.37 kB
import gradio as gr
import azure.cognitiveservices.speech as speechsdk
import time
# Azure credentials
SPEECH_KEY = "your_speech_key"
SERVICE_REGION = "your_service_region"
# Define the language and dialect mapping
language_dialects = {
"Arabic": {
"Egypt": "ar-EG",
"Saudi Arabia": "ar-SA",
"United Arab Emirates": "ar-AE",
"Bahrain": "ar-BH",
"Algeria": "ar-DZ",
"Iraq": "ar-IQ",
"Jordan": "ar-JO",
"Kuwait": "ar-KW",
"Lebanon": "ar-LB",
"Libya": "ar-LY",
"Morocco": "ar-MA",
"Oman": "ar-OM",
"Palestinian Authority": "ar-PS",
"Qatar": "ar-QA",
"Syria": "ar-SY",
"Tunisia": "ar-TN",
"Yemen": "ar-YE"
},
"English": {
"United States": "en-US",
"United Kingdom": "en-GB",
"Australia": "en-AU",
"Canada": "en-CA",
"India": "en-IN",
"Ireland": "en-IE",
"New Zealand": "en-NZ",
"South Africa": "en-ZA",
"Singapore": "en-SG",
"Philippines": "en-PH"
},
"French": {
"France": "fr-FR",
"Canada": "fr-CA",
"Switzerland": "fr-CH"
},
"Spanish": {
"Spain": "es-ES",
"Mexico": "es-MX",
"Argentina": "es-AR",
"Colombia": "es-CO",
"Chile": "es-CL",
"Peru": "es-PE",
"Venezuela": "es-VE"
},
"German": {
"Germany": "de-DE",
"Austria": "de-AT",
"Switzerland": "de-CH"
},
"Portuguese": {
"Portugal": "pt-PT",
"Brazil": "pt-BR"
},
"Chinese": {
"Mainland China": "zh-CN",
"Hong Kong": "zh-HK",
"Taiwan": "zh-TW"
},
"Italian": {
"Italy": "it-IT"
},
"Japanese": {
"Japan": "ja-JP"
},
"Korean": {
"Korea": "ko-KR"
}
# Add more languages and dialects as needed
}
# Function to get dialects based on selected language
def get_dialects(language):
dialects = list(language_dialects.get(language, {}).keys())
return gr.update(choices=dialects, value=dialects[0] if dialects else None)
# Function to transcribe audio
def transcribe_audio(audio_file, duration, language, dialect):
# Simulate recording duration
print(f"Recording for {duration} seconds...")
time.sleep(duration)
# Get the locale code
locale_code = language_dialects.get(language, {}).get(dialect, "en-US")
print(f"Selected Locale Code: {locale_code}")
# Set up speech recognition
speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SERVICE_REGION)
speech_config.speech_recognition_language = locale_code
print(locale_code)
audio_input = speechsdk.audio.AudioConfig(filename=audio_file)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
result = speech_recognizer.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
return result.text
elif result.reason == speechsdk.ResultReason.NoMatch:
return "No speech could be recognized"
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
return f"Speech recognition canceled: {cancellation_details.error_details}"
else:
return "Unknown error occurred during speech recognition"
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Azure Speech to Text with Language and Dialect Selection")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Upload Audio")
duration_input = gr.Dropdown(choices=[5, 10], label="Recording Duration", value=5)
with gr.Row():
language_input = gr.Dropdown(choices=list(language_dialects.keys()), label="Select Language")
dialect_input = gr.Dropdown(choices=[], label="Select Dialect")
transcribe_button = gr.Button("Transcribe")
output_text = gr.Textbox(label="Transcription Result")
# Update dialect options based on selected language
language_input.change(fn=get_dialects, inputs=language_input, outputs=dialect_input)
# Transcribe audio on button click
transcribe_button.click(fn=transcribe_audio, inputs=[audio_input, duration_input, language_input, dialect_input], outputs=output_text)
# Launch the app
demo.launch()