import json import gradio as gr import numpy as np import plotly.express as px import plotly.graph_objects as go import requests from helpers import make_header, upload_file, request_transcript, make_polling_endpoint, wait_for_completion, \ make_html_from_topics, make_paras_string, create_highlighted_list, make_summary, \ make_sentiment_output, make_entity_dict, make_entity_html, make_true_dict, make_final_json, make_content_safety_fig from helpers import transcription_options_headers, audio_intelligence_headers, language_headers def change_audio_source(radio, plot, file_data, mic_data): """When the audio source radio selector is changed, update the wave plot and change the audio selector accordingly""" # Empty plot plot.update_traces(go.Line(y=[])) # Update plot with appropriate data and change visibility audio components if radio == "Audio File": sample_rate, audio_data = file_data plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate)) return [gr.Audio.update(visible=True), gr.Audio.update(visible=False), plot, plot] elif radio == "Record Audio": sample_rate, audio_data = mic_data plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate)) return [gr.Audio.update(visible=False), gr.Audio.update(visible=True), plot, plot] def plot_data(audio_data, plot): """Updates plot and appropriate state variable when audio is uploaded/recorded or deleted""" # If the current audio file is deleted if audio_data is None: # Replace the state variable for the audio source with placeholder values sample_rate, audio_data = [0, np.array([])] # Update the plot to be empty plot.update_traces(go.Line(y=[])) # If new audio is uploaded/recorded else: # Replace the current state variable with new sample_rate, audio_data = audio_data # Plot the new data plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate)) # Update the plot component and data state variable return [plot, [sample_rate, audio_data], plot] def set_lang_vis(transcription_options): """Sets visibility of language selector/warning when automatic language detection is (de)selected""" if 'Automatic Language Detection' in transcription_options: text = w return [gr.Dropdown.update(visible=False), gr.Textbox.update(value=text, visible=True)] else: text = "" return [gr.Dropdown.update(visible=True), gr.Textbox.update(value=text, visible=False)] def option_verif(language, selected_tran_opts, selected_audint_opts): """When the language is changed, this function automatically deselects options that are not allowed for that language.""" not_available_tran, not_available_audint = get_unavailable_opts(language) current_tran_opts = list(set(selected_tran_opts) - set(not_available_tran)) current_audint_opts = list(set(selected_audint_opts) - set(not_available_audint)) return [current_tran_opts, current_audint_opts, current_tran_opts, current_audint_opts] # Get tran/audint opts that are not available by language def get_unavailable_opts(language): """Get transcription and audio intelligence options that are unavailable for a given language""" if language in ['Spanish', 'French', 'German', 'Portuguese']: not_available_tran = ['Speaker Labels'] not_available_audint = ['PII Redaction', 'Auto Highlights', 'Sentiment Analysis', 'Summarization', 'Entity Detection'] elif language in ['Italian', 'Dutch']: not_available_tran = ['Speaker Labels'] not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection', 'Sentiment Analysis', 'Summarization', 'Entity Detection'] elif language in ['Hindi', 'Japanese']: not_available_tran = ['Speaker Labels'] not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection', 'Sentiment Analysis', 'Summarization', 'Entity Detection'] else: not_available_tran = [] not_available_audint = [] return not_available_tran, not_available_audint # When selecting new tran option, checks to make sure allowed by language and # then adds to selected_tran_opts and updates def tran_selected(language, transcription_options): """When a transcription option is selected, """ unavailable, _ = get_unavailable_opts(language) selected_tran_opts = list(set(transcription_options) - set(unavailable)) return [selected_tran_opts, selected_tran_opts] # When selecting new audint option, checks to make sure allowed by language and # then adds to selected_audint_opts and updates def audint_selected(language, audio_intelligence_selector): """Deselected""" _, unavailable = get_unavailable_opts(language) selected_audint_opts = list(set(audio_intelligence_selector) - set(unavailable)) return [selected_audint_opts, selected_audint_opts] def create_output(r, paras, language, transc_opts=None, audint_opts=None): """From a transcript response, return all outputs for audio intelligence""" if transc_opts is None: transc_opts = ['Automatic Language Detection', 'Speaker Labels', 'Filter Profanity'] if audint_opts is None: audint_opts = ['Summarization', 'Auto Highlights', 'Topic Detection', 'Entity Detection', 'Sentiment Analysis', 'PII Redaction', 'Content Moderation'] # DIARIZATION if "Speaker Labels" in transc_opts: utts = '\n\n\n'.join([f"Speaker {utt['speaker']}:\n\n" + utt['text'] for utt in r['utterances']]) else: utts = " NOT ANALYZED" # HIGHLIGHTS if 'Auto Highlights' in audint_opts: highlight_dict = create_highlighted_list(paras, r['auto_highlights_result']['results']) else: highlight_dict =[["NOT ANALYZED", 0]] # SUMMARIZATION' if 'Summarization' in audint_opts: chapters = r['chapters'] summary_html = make_summary(chapters) else: summary_html = "<p>NOT ANALYZED</p>" # TOPIC DETECTION if "Topic Detection" in audint_opts: topics = r['iab_categories_result']['summary'] topics_html = make_html_from_topics(topics) else: topics_html = "<p>NOT ANALYZED</p>" # SENTIMENT if "Sentiment Analysis" in audint_opts: sent_results = r['sentiment_analysis_results'] sent = make_sentiment_output(sent_results) else: sent = "<p>NOT ANALYZED</p>" # ENTITY if "Entity Detection" in audint_opts: entities = r['entities'] t = r['text'] d = make_entity_dict(entities, t) entity_html = make_entity_html(d) else: entity_html = "<p>NOT ANALYZED</p>" # CONTENT SAFETY if "Content Moderation" in audint_opts: cont = r['content_safety_labels']['summary'] content_fig = make_content_safety_fig(cont) else: content_fig = go.Figure() return [language, paras, utts, highlight_dict, summary_html, topics_html, sent, entity_html, content_fig] def submit_to_AAI(api_key, transcription_options, audio_intelligence_selector, language, radio, audio_file, mic_recording): # Make request header header = make_header(api_key) # Map transcription/audio intelligence options to AssemblyAI API request JSON dict true_dict = make_true_dict(transcription_options, audio_intelligence_selector) final_json, language = make_final_json(true_dict, language) final_json = {**true_dict, **final_json} # Select which audio to use if radio == "Audio File": audio_data = audio_file elif radio == "Record Audio": audio_data = mic_recording # Upload the audio upload_url = upload_file(audio_data, header, is_file=False) # Request transcript transcript_response = request_transcript(upload_url, header, **final_json) # Wait for the transcription to complete polling_endpoint = make_polling_endpoint(transcript_response) wait_for_completion(polling_endpoint, header) # Fetch results JSON r = requests.get(polling_endpoint, headers=header, json=final_json).json() # Fetch paragraphs of transcript transc_id = r['id'] paras = make_paras_string(transc_id, header) return create_output(r, paras, language, transcription_options, audio_intelligence_selector) def example_output(language): """Displays example output""" with open("example_data/paras.txt", 'r') as f: paras = f.read() with open('example_data/response.json', 'r') as f: r = json.load(f) return create_output(r, paras, language) with open('app/styles.css', 'r') as f: css = f.read() with gr.Blocks(css=css) as demo: # Load image gr.HTML('<a href="https://www.assemblyai.com/"><img src="file/app/images/logo.png" class="logo"></a>') # Load descriptions # www.assemblyai.com/blog/how-to-build-an-audio-intelligence-dashboard-with-gradio/ gr.HTML("<h1 class='title'>Audio Intelligence Dashboard</h1>" "<br>" "<p>Check out the <a href=\"https://www.assemblyai.com/blog/getting-started-with-huggingfaces-gradio/\">Getting Started with Hugging Face's Gradio</a> blog to learn how to build this dashboard.</p>") gr.HTML("<h1 class='title'>Directions</h1>" "<p>To use this dashboard:</p>" "<ul>" "<li>1) Paste your AssemblyAI API Key into the box below - you can copy it from <a href=\"https://app.assemblyai.com/signup\">here</a> (or get one for free if you don't already have one)</li>" "<li>2) Choose an audio source and upload or record audio</li>" "<li>3) Select the types of analyses you would like to perform on the audio</li>" "<li>4) Click <i>Submit</i></li>" "<li>5) View the results at the bottom of the page</li>" "<ul>" "<br>" "<p>You may also click <b>Show Example Output</b> below to see an example without having to enter an API key.") gr.HTML('<div class="alert alert__warning"><span>' 'Note that this dashboard is not an official AssemblyAI product and is intended for educational purposes.' '</span></div>') # API Key title with gr.Box(): gr.HTML("<p class=\"apikey\">API Key:</p>") # API key textbox (password-style) api_key = gr.Textbox(label="", elem_id="pw") # Gradio states for - plotly Figure object, audio data for file source, and audio data for mic source plot = gr.State(px.line(labels={'x': 'Time (s)', 'y': ''})) file_data = gr.State([1, [0]]) # [sample rate, [data]] mic_data = gr.State([1, [0]]) # [Sample rate, [data]] # Options that the user wants selected_tran_opts = gr.State(list(transcription_options_headers.keys())) selected_audint_opts = gr.State(list(audio_intelligence_headers.keys())) # Current options = selected options - unavailable options for specified language current_tran_opts = gr.State([]) current_audint_opts = gr.State([]) # Selector for audio source radio = gr.Radio(["Audio File", "Record Audio"], label="Audio Source", value="Audio File") # Audio object for both file and microphone data audio_file = gr.Audio() mic_recording = gr.Audio(source="microphone", visible=False) # Audio wave plot audio_wave = gr.Plot(plot.value) # Checkbox for transcription options transcription_options = gr.CheckboxGroup( choices=list(transcription_options_headers.keys()), value=list(transcription_options_headers.keys()), label="Transcription Options", ) # Warning for using Automatic Language detection w = "<div class='alert alert__warning'>" \ "<p>Automatic Language Detection not available for Hindi or Japanese. For best results on non-US " \ "English audio, specify the dialect instead of using Automatic Language Detection. " \ "<br>" \ "Some Audio Intelligence features are not available in some languages. See " \ "<a href='https://airtable.com/shr53TWU5reXkAmt2/tblf7O4cffFndmsCH?backgroundColor=green'>here</a> " \ "for more details.</p>" \ "</div>" auto_lang_detect_warning = gr.HTML(w) # Checkbox for Audio Intelligence options audio_intelligence_selector = gr.CheckboxGroup( choices=list(audio_intelligence_headers.keys()), value=list(audio_intelligence_headers.keys()), label='Audio Intelligence Options' ) # Language selector for manual language selection language = gr.Dropdown( choices=list(language_headers.keys()), value="US English", label="Language Specification", visible=False, ) # Button to submit audio for processing with selected options submit = gr.Button('Submit') # Button to submit audio for processing with selected options example = gr.Button('Show Example Output') # Results tab group phl = 10 with gr.Tab('Transcript'): trans_tab = gr.Textbox(placeholder="Your formatted transcript will appear here ...", lines=phl, max_lines=25, show_label=False) with gr.Tab('Speaker Labels'): diarization_tab = gr.Textbox(placeholder="Your diarized transcript will appear here ...", lines=phl, max_lines=25, show_label=False) with gr.Tab('Auto Highlights'): highlights_tab = gr.HighlightedText() with gr.Tab('Summary'): summary_tab = gr.HTML("<br>" * phl) with gr.Tab("Detected Topics"): topics_tab = gr.HTML("<br>" * phl) with gr.Tab("Sentiment Analysis"): sentiment_tab = gr.HTML("<br>" * phl) with gr.Tab("Entity Detection"): entity_tab = gr.HTML("<br>" * phl) with gr.Tab("Content Safety"): content_tab = gr.Plot() ####################################### Functionality ###################################################### # Changing audio source changes Audio input component radio.change(fn=change_audio_source, inputs=[ radio, plot, file_data, mic_data], outputs=[ audio_file, mic_recording, audio_wave, plot]) # Inputting audio updates plot audio_file.change(fn=plot_data, inputs=[audio_file, plot], outputs=[audio_wave, file_data, plot] ) mic_recording.change(fn=plot_data, inputs=[mic_recording, plot], outputs=[audio_wave, mic_data, plot]) # Deselecting Automatic Language Detection shows Language Selector transcription_options.change( fn=set_lang_vis, inputs=transcription_options, outputs=[language, auto_lang_detect_warning]) # Changing language deselects certain Tran / Audio Intelligence options language.change( fn=option_verif, inputs=[language, selected_tran_opts, selected_audint_opts], outputs=[transcription_options, audio_intelligence_selector, current_tran_opts, current_audint_opts] ) # Selecting Tran options adds it to selected if language allows it transcription_options.change( fn=tran_selected, inputs=[language, transcription_options], outputs=[transcription_options, selected_tran_opts, ] ) # Selecting audio intelligence options adds it to selected if language allows it audio_intelligence_selector.change( fn=audint_selected, inputs=[language, audio_intelligence_selector], outputs=[audio_intelligence_selector, selected_audint_opts] ) # Clicking "submit" uploads selected audio to AssemblyAI, performs requested analyses, and displays results submit.click(fn=submit_to_AAI, inputs=[api_key, transcription_options, audio_intelligence_selector, language, radio, audio_file, mic_recording], outputs=[language, trans_tab, diarization_tab, highlights_tab, summary_tab, topics_tab, sentiment_tab, entity_tab, content_tab]) # Clicking "Show Example Output" displays example results example.click(fn=example_output, inputs=language, outputs=[language, trans_tab, diarization_tab, highlights_tab, summary_tab, topics_tab, sentiment_tab, entity_tab, content_tab]) # Launch the application demo.launch() # share=True