{ "asr": " Recognize the speech and give me the transcription.", "gender_recognition": " What is the gender of the speaker?", "dialect_identification": " What is the dialect of the speaker?", "asr_zh": " 请将语音中的内容写下来。", "summarization": " Could you capture the main points of this audio in a short summary?", "translation_ae": " Listen to the speech and translate it into English.", "asr_de": " Hören Sie sich die Rede an und schreiben Sie ihren Inhalt auf.", "translation_ec": " Listen to the speech and translate it into Chinese.", "audiocaption": " Please describe the audio.", "audiocaption_v2": " Please write down what your hear in the audio.", "QA": " {}", "gender_QA": " {}", "phone_recognition": " Provide the phonetic transcription for the speech.", "speech_query": " Please answer the question in detail.", "emotion_recognition": " Describe the emotion of the speaker in one word.", "lyrics_recognition": " Listen to the song and write down its content.", "audio_speech_description": " Describe the speech and the background audio", "speaker_verification": " Do you only hear the same person talking? Answer yes or no.", "fluent_speech_audio": " Describe the background audio and the speech in a fluent sentence.", "speech_separation": " Please write down what you hear each person says.", "audio_story_telling": " Based on the audio, write a story in detail. Your story should be highly related to the audio.", "speech_audio_query": " Please answer the speaker's question in detail based on the background sound.", "slot_filling": " According to the speech, what is the {}?", "music_description": " Listen to this music clip and describe the music.", "translation_en2ja": " Listen to the speech and translate it into Japanese.", "translation_en2de": " Listen to the speech and translate it into German.", "speech_audio_coreasoning": " Use your strong reasoning skills to answer the speaker's question in detail based on the background sound.", "keywords": " Give me only three keywords of the text.", "speaker_diarization_asr": " Please recognize each speaker and transcribe their speech content." }