import gradio as gr import whisper from gtts import gTTS import os import numpy as np from scipy.io.wavfile import write as write_wav from datetime import datetime import nltk from nltk.tokenize import word_tokenize from nltk import pos_tag import cv2 # <-- NEW: Import OpenCV from shapely.geometry import Polygon # to calculte sides import re # <-- NEW: Import 're' for cleaning numeric answers import time import pytz # <-- NEW: Import pytz for timezone handling # Did NOT work as designed to parse dates from text (the fifth, third) # import dateparser # NLTK data downloader setup LOCAL_NLTK_DATA_PATH = os.path.join(os.path.dirname(__file__), 'nltk_data') if LOCAL_NLTK_DATA_PATH not in nltk.data.path: nltk.data.path.append(LOCAL_NLTK_DATA_PATH) def download_nltk_data_if_needed(resource_name, download_name): try: nltk.data.find(resource_name) except LookupError: print(f"Downloading NLTK resource '{download_name}'...") if not os.path.exists(LOCAL_NLTK_DATA_PATH): os.makedirs(LOCAL_NLTK_DATA_PATH) nltk.download(download_name, download_dir=LOCAL_NLTK_DATA_PATH) print("Download complete.") # --- MODIFIED: Added the missing NLTK resources to the download list --- download_nltk_data_if_needed('tokenizers/punkt', 'punkt') download_nltk_data_if_needed('taggers/averaged_perceptron_tagger', 'averaged_perceptron_tagger') download_nltk_data_if_needed('tokenizers/punkt_tab', 'punkt_tab') download_nltk_data_if_needed('taggers/averaged_perceptron_tagger_eng', 'averaged_perceptron_tagger_eng') print(f"✅ Gradio version: {gr.__version__}") model = whisper.load_model("small") # Transcription function def transcribe(audio): if audio is None: return "" sample_rate, y = audio temp_wav_path = "/tmp/temp_audio.wav" write_wav(temp_wav_path, sample_rate, y) result = model.transcribe(temp_wav_path, language="en") return result["text"] # Text-to-speech function # def speak_question(text): # clean_text = text.strip().split(" ", 1)[-1] # tts = gTTS(clean_text) # filepath = "/tmp/question.mp3" # tts.save(filepath) # return filepath # --- NEW: Helper function to determine season in the Northern Hemisphere --- def get_season(month): if 3 <= month <= 5: return "spring" elif 6 <= month <= 8: return "summer" elif 9 <= month <= 11: return "fall" else: return "winter" # --- MODIFIED: Get current date and time for a specific timezone --- # Set the target timezone (US Eastern Time) TARGET_TIMEZONE = pytz.timezone("America/New_York") # Get the current time in UTC and then convert it to the target timezone now_utc = datetime.now(pytz.utc) now = now_utc.astimezone(TARGET_TIMEZONE) # --- END of timezone fix --- # old code # now = datetime.now() # Define grouped questions with a new structure for optional instructions GROUPED_QUESTIONS = { # --- MODIFIED: This entire section now uses dynamic date values --- "Question 1: Temporal Orientation": { "What year is this?": { "answer": str(now.year), "instruction": "Instructions for tester: Score 1 point for the correct year." }, "What season is this in Northern Hemisphere?": { "answer": get_season(now.month), "instruction": "Instructions for tester: Examples: Summer, Fall, Winter, Spring" }, "What month is this?": { "answer": now.strftime("%B").lower(), "instruction": "Instructions for tester: Examples: january, february, ... , november, december" }, "What is the day of today's date?": { "answer": str(now.day), "instruction": "Instructions for tester: Examples: Range from 1 to 31" }, "What day of the week is this?": { "answer": now.strftime("%A").lower(), "instruction": "Instructions for tester: monday, tuesday, ..., saturday, sunday" } }, "Question 2: Spatial Orientation": { "What country are we in?": {"answer": "united states"}, "What state are we in?": {"answer": "connecticut"}, "What city or town are we in?": {"answer": "greenwich"}, "What is the street address / name of building?": {"answer": "123 main street"}, "What room or floor are we in?": {"answer": "living room"}, }, "Question 3: Memory Registration": { ": I am going to name three words. Repeat these three words: Ball Car Man": { "answer": "ball car man", "instruction": "Instructions for tester: Say the words clearly at a rate of one per second. After the user responds, say 'Keep those words in mind. I am going to ask you to say them again in a few minutes.'", "max_points": 3 # This could also be scored per word } }, "Question 4: Attention": { "Count backward from 100 substracting by sevens": { "answer": "93 86 79 72 65", "instruction": "Instructions for tester: Stop after five subtractions. Score one point for each correct number.", "max_points": 5 # This question is worth 5 points } }, "Question 5: Delayed Recall": { "What were the three words I asked you to remember?": { "answer": "ball car man", "max_points": 3 } }, # --- CORRECTED SECTION STARTS HERE --- "Question 6: Naming Communication": { "I am going to show you the first object and I would like you to name it": { "answer": "watch|wristwatch", # <-- MODIFIED "instruction": "Instructions for tester: Show the patient a simple object, a watch", "max_points": 1 }, "I am going to show you the second object and I would like you to name it": { "answer": "pencil", "instruction": "Instructions for tester: Show the patient a simple object, a pencil", "max_points": 1 } }, "Question 7: Sentence Repetition": { "I would like you to repeat a phrase after me: No ifs, ands, or buts.": { "answer": "no ifs, ands, or buts", "max_points": 1 } }, "Question 8: Praxis 3-Stage Movement": { # vs Apaxia is inability to perform skilled, learned movements "Take this paper in your non-dominant hand, fold the paper in half once with both hands and put the paper down on the floor.": { "answer": "A numeric value from 0 to 3 representing tasks completed.", # <-- MODIFIED "instruction": "Instructions for tester: Input how many tasks completed (0 to 3)", "max_points": 3 } }, "Question 9: Reading on CLOSE YOUR EYES": { "Read the CAPITALIZED words on this question and then do what it says": { "answer": "yes", "instruction": "Instructions for tester: Input 'yes' if eyes are closed; else, 'no'", "max_points": 1 } }, "Question 10: Writing Communication": { "Write any complete sentence here or on a piece of paper": { "answer": "A sentence containing at least one noun and one verb.", "max_points": 1 } }, # For demonstration, we'll make the last question the one that requires drawing "Question 11: Visuoconstruction": { "Please draw a copy of this picture": { "answer": "4", "instruction": "Instructions for tester: Give the user a piece of paper. Show them a drawing of two overlapping pentagons. Ask them to draw a copy.", "max_points": 1 } } } # Create a structured list that preserves the question hierarchy and instructions # MODIFIED: The structured list now includes max_points STRUCTURED_QUESTIONS = [] main_num = 1 for section, questions in GROUPED_QUESTIONS.items(): main_cat_name = section.split(":", 1)[1].strip() if ":" in section else section sub_q_idx = 0 for question, data in questions.items(): STRUCTURED_QUESTIONS.append({ "main_cat": main_cat_name, "main_num": main_num, "sub_letter": chr(ord('a') + sub_q_idx), "question": question, "answer": data["answer"], "instruction": data.get("instruction", ""), "max_points": data.get("max_points", 1) }) sub_q_idx += 1 main_num += 1 TOTAL_QUESTIONS = len(STRUCTURED_QUESTIONS) # --- NEW: Create a list of question titles for the navigation dropdown --- QUESTION_CHOICES = [ f"Q{q['main_num']}{q['sub_letter']}: {q['question']}" for q in STRUCTURED_QUESTIONS ] # --- NEW: Identify the indices of our special questions --- # The index for Q3a, "Repeat these three words..." # Q3_INDEX = 10 # The index for the last question in our list # LAST_Q_INDEX = TOTAL_QUESTIONS - 1 # --- MODIFIED: More robust way to find indices of special questions --- def find_question_index(keyword): """Finds the first index of a question containing a keyword.""" return next((i for i, q in enumerate(STRUCTURED_QUESTIONS) if keyword in q["question"]), -1) DRAWING_Q_INDEX = find_question_index("draw a copy of this picture") # --- NEW: Pre-generate all TTS audio at startup to avoid rate-limiting --- # --- Pre-generate all TTS audio at startup --- print("Pre-generating TTS audio for all questions...") AUDIO_FILE_MAP = {} for i, q_data in enumerate(STRUCTURED_QUESTIONS): try: clean_text = q_data['question'].strip() tts = gTTS(clean_text) filepath = f"/tmp/question_{i}.mp3" tts.save(filepath) AUDIO_FILE_MAP[i] = filepath time.sleep(0.5) # <-- ADDED: Pause for half a second to avoid rate-limiting except Exception as e: print(f"Warning: Could not pre-generate audio for question {i}: {e}") AUDIO_FILE_MAP[i] = None print("TTS audio pre-generation complete.") # --- MODIFIED: speak_question now correctly looks up the pre-generated file path --- def speak_question(current_index): return AUDIO_FILE_MAP.get(current_index) # END # --- NEW: Expanded dictionary to convert number words to digits --- WORD_TO_DIGIT = { 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'eleven': '11', 'twelve': '12', 'thirteen': '13', 'fourteen': '14', 'fifteen': '15', 'sixteen': '16', 'seventeen': '17', 'eighteen': '18', 'nineteen': '19', 'twenty': '20', 'thirty': '30', 'thirty one': '31', # Add common phrases for the 'sevens' test for robustness 'ninety three': '93', 'eighty six': '86', 'seventy nine': '79', 'seventy two': '72', 'sixty five': '65' } # --- MODIFIED: Dictionary to convert ordinal words to digits for the date question --- ORDINAL_TO_DIGIT = { # Single word ordinals 'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5', 'sixth': '6', 'seventh': '7', 'eighth': '8', 'ninth': '9', 'tenth': '10', 'eleventh': '11', 'twelfth': '12', 'thirteenth': '13', 'fourteenth': '14', 'fifteenth': '15', 'sixteenth': '16', 'seventeenth': '17', 'eighteenth': '18', 'nineteenth': '19', 'twentieth': '20', 'thirtieth': '30', # Hyphenated compound ordinals 'twenty-first': '21', 'twenty-second': '22', 'twenty-third': '23', 'twenty-fourth': '24', 'twenty-fifth': '25', 'twenty-sixth': '26', 'twenty-seventh': '27', 'twenty-eighth': '28', 'twenty-ninth': '29', 'thirty-first': '31', # --- NEW: Unhyphenated compound ordinals --- 'twenty first': '21', 'twenty second': '22', 'twenty third': '23', 'twenty fourth': '24', 'twenty fifth': '25', 'twenty sixth': '26', 'twenty seventh': '27', 'twenty eighth': '28', 'twenty ninth': '29', 'thirty first': '31', # Suffix-based ordinals '1st': '1', '2nd': '2', '3rd': '3', '4th': '4', '5th': '5', '6th': '6', '7th': '7', '8th': '8', '9th': '9', '10th': '10', '11th': '11', '12th': '12', '13th': '13', '14th': '14', '15th': '15', '16th': '16', '17th': '17', '18th': '18', '19th': '19', '20th': '20', '21st': '21', '22nd': '22', '23rd': '23', '24th': '24', '25th': '25', '26th': '26', '27th': '27', '28th': '28', '29th': '29', '30th': '30', '31st': '31' } # A robust function to clean all text inputs before scoring def clean_text_answer(text: str) -> str: if not text: return "" text = text.lower() text = re.sub(r'[^\w\s]', '', text) text = " ".join(text.split()) return text # --- REVISED: A more flexible scoring function that handles synonyms and partial matches --- def score_keyword_match(expected, user_input): """ Checks if any of the expected keywords (separated by '|') are present in the user's answer. This allows for synonyms and partial matches like 'watch' in 'wristwatch'. """ if not expected or not user_input: return 0 # Clean the user's input once cleaned_user = clean_text_answer(user_input) # Split the expected answer by '|' to get a list of acceptable keywords/synonyms possible_answers = expected.split('|') # Check if any of the possible answers is a substring of the user's input for ans in possible_answers: # We also clean each possible answer for a fair comparison cleaned_ans = clean_text_answer(ans) if cleaned_ans in cleaned_user: return 1 # Found a match, award point and exit return 0 # No match found after checking all possibilities # --- MODIFIED: Explicitly tell dateparser to use English --- # --- MODIFIED: Explicitly tell dateparser to use English --- def score_date_response(raw_user_input): """ Parses natural language to find the day of the month and scores it. Returns 1 if the user's answer matches today's date, 0 otherwise. """ if not raw_user_input: return 0 try: # --- THIS IS THE FIX --- # We now force the parser to use the English language to avoid ambiguity. parsed_date = dateparser.parse(raw_user_input, languages=['en']) if parsed_date and parsed_date.day == now.day: return 1 except Exception as e: print(f"[Dateparser ERROR] Failed to parse date: {e}") return 0 # --- MODIFIED: Scoring functions now expect pre-cleaned text --- def score_response(expected, cleaned_user_input): """Performs a simple, exact-match comparison on cleaned text.""" if not expected: return 0 # The expected answer should also be cleaned for a fair comparison cleaned_expected = clean_text_answer(expected) return int(cleaned_user_input == cleaned_expected) # --- NEW: Special scoring function --- # CORRECTED: Replace commas and then split to handle inputs like "93, 85, 72" def score_sevens_response(cleaned_user_input): """Scores the sevens question from cleaned, space-separated numbers.""" correct_numbers = {"93", "86", "79", "72", "65"} user_numbers = set((cleaned_user_input or "").split()) # Count how many of the user's numbers are in the correct set return len(correct_numbers.intersection(user_numbers)) # --- NEW: Special scoring function for the "three words" questions --- # Clean input by making it lowercase and splitting into unique words def score_three_words_response(cleaned_user_input): """Scores the three words question from cleaned text.""" correct_words = {"ball", "car", "man"} user_words = set((cleaned_user_input or "").split()) # Count how many of the correct words are present in the user's response return len(correct_words.intersection(user_words)) def score_sentence_structure(raw_user_input): """Checks for noun/verb in the original, un-cleaned text.""" try: text = nltk.word_tokenize(raw_user_input or "") if len(text) < 2: return 0 pos_tags = nltk.pos_tag(text) # Check for any type of noun (NN, NNS, NNP, NNPS) has_noun = any(tag.startswith('NN') for word, tag in pos_tags) # Check for any type of verb (VB, VBD, VBG, VBN, VBP, VBZ) has_verb = any(tag.startswith('VB') for word, tag in pos_tags) return 1 if has_noun and has_verb else 0 except Exception as e: print(f"[NLTK ERROR] Failed to parse sentence: {e}") return 0 # --- NEW: Special scoring function for Question 11 using OpenCV --- # --- FINAL VERSION: but special case for non-intersecting shapes does not work --- def score_drawing(image_path, expected_sides): """ Finds the smallest significant polygon, assuming it is the intersection. If fewer than 3 significant shapes are found, it assumes no valid intersection. Score is 1 if the intersection's sides match the expected number. Returns a tuple: (score, sides_of_smallest_shape). """ print("\n--- Debugging score_drawing (Final Logic w/ Special Case) ---") if not image_path or not os.path.exists(image_path): return 0, 0 try: img = cv2.imread(image_path) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _ , thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # Step 1: Filter out small noise to get a list of significant shapes significant_contours = [] for contour in contours: if cv2.contourArea(contour) > 500: # Noise filter significant_contours.append(contour) print(f"DEBUG: Found {len(significant_contours)} significant contours.") # --- THIS IS THE NEW SPECIAL CASE --- # Step 2: If there are fewer than 3 shapes, there can't be a valid intersection. if len(significant_contours) < 3: print("DEBUG: Less than 3 significant shapes found. Concluding no intersection.") print("---------------------------------") return 0, 0 # --- END OF SPECIAL CASE --- # Step 3: If we pass the check, proceed with the 'find smallest shape' logic. min_area = float('inf') sides_of_smallest_shape = 0 for contour in significant_contours: area = cv2.contourArea(contour) epsilon = 0.04 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) num_sides = len(approx) if area < min_area: min_area = area sides_of_smallest_shape = num_sides print(f"\nDEBUG: --- Final Analysis ---") print(f"DEBUG: The smallest significant shape had {sides_of_smallest_shape} sides.") score = 1 if sides_of_smallest_shape == expected_sides else 0 print(f"DEBUG: Comparing found sides ({sides_of_smallest_shape}) with expected sides ({expected_sides}). Score: {score}") print("---------------------------------") return score, sides_of_smallest_shape except Exception as e: print(f"[OpenCV ERROR] Failed to process image: {e}") return 0, 0 # --- NEW: Special function to handle ordinal words like "fifth" or "2nd" --- def normalize_date_answer(text: str) -> str: """Converts spoken ordinals and phrases into a clean numeric string.""" if not text: return "" # Make it lowercase and remove leading/trailing spaces clean_text = text.lower().strip() # Remove common articles that might interfere if clean_text.startswith("the "): clean_text = clean_text[4:] # Replace ordinal words with digits for word, digit in ORDINAL_TO_DIGIT.items(): if word in clean_text: clean_text = clean_text.replace(word, digit) break # Stop after the first match # Return only the digits from the result return re.sub(r'\D', '', clean_text) def clean_numeric_answer(text): """Removes all non-digit characters from a string.""" return re.sub(r'\D', '', text or "") # --- NEW: A robust function to replace number words with digits in a string --- def normalize_numeric_words(text: str) -> str: """Converts spoken number words in a string to digits.""" if not text: return "" text = text.lower().strip() # Iterate through the dictionary to replace words with digits # Using \b (word boundary) ensures we don't replace parts of other words for word, digit in WORD_TO_DIGIT.items(): text = re.sub(r'\b' + re.escape(word) + r'\b', digit, text) return text # --- MODIFIED: Evaluate function is now more robust to word-based numeric answers --- def evaluate(answers_list, user_drawing_path): total_score, total_possible_score, results = 0, 0, [] for i, q_data in enumerate(STRUCTURED_QUESTIONS): user_answer = answers_list[i] # This is the raw answer for display point = 0 # --- KEY CHANGE: Normalize number words to digits at the start --- # This converts "two" to "2", "ninety three" to "93", etc. normalized_answer = normalize_numeric_words(user_answer) # Routing logic for different scoring types if "draw a copy of this picture" in q_data["question"]: # --- THIS BLOCK CONTAINS THE FIX --- try: # 1. Get the expected number of sides from the 'answer' field (e.g., "4") expected_sides = int(q_data["answer"]) except (ValueError, TypeError): expected_sides = 0 # Default if the answer is not a number # 2. Call score_drawing with BOTH required arguments point, sides_detected = score_drawing(user_drawing_path, expected_sides) # This part remains the same if sides_detected > 0: user_answer = f"[{sides_detected}-sided shape detected]" elif user_drawing_path and os.path.exists(user_drawing_path): user_answer = "[Image uploaded, but no clear shape found]" else: user_answer = "[No image uploaded]" # --- END OF FIX --- # IMPORTANT: Sentence analysis needs the ORIGINAL answer for punctuation elif "Write any complete sentence" in q_data["question"]: point = score_sentence_structure(user_answer) # Use the NORMALIZED answer for the sevens question elif "substracting by sevens" in q_data["question"]: point = score_sevens_response(clean_text_answer(normalized_answer)) elif "three words" in q_data["question"] or "three objects" in q_data["question"]: point = score_three_words_response(clean_text_answer(user_answer)) # Use NEW flexible function to handle answer (fifth, second, thirty first. etc ) elif "day of today's date" in q_data["question"]: # Use our new, robust function. It directly takes the original user answer. # point = score_date_response(user_answer) # 1. Normalize the user's answer (e.g., "the fifth" -> "5") normalized_day = normalize_date_answer(user_answer) # 2. Compare it directly with today's day point = 1 if normalized_day == str(now.day) else 0 # --- FIX for Q8: Use the NORMALIZED answer for direct scoring --- elif "Take this paper" in q_data["question"]: point = 0 # Default score try: # This will now work for both "2" and the normalized "two" numeric_score = int(clean_numeric_answer(normalized_answer)) point = min(numeric_score, q_data["max_points"]) except (ValueError, TypeError): # If input is still not a number (e.g., "a lot"), score is 0 point = 0 else: # Fallback to default scoring using the normalized answer point = score_keyword_match(q_data["answer"], clean_text_answer(normalized_answer)) if point == 1: point = q_data["max_points"] result_string = (f"Q{q_data['main_num']}{q_data['sub_letter']}: {q_data['question']}\n" f" - Score: {point} / {q_data['max_points']} | Your Answer: '{user_answer}' | Expected: '{q_data['answer']}'") results.append(result_string) total_score += point total_possible_score += q_data["max_points"] return "\n\n".join(results), f"{total_score} / {total_possible_score}", gr.update(visible=False), gr.update(visible=True) # --- MODIFIED: Navigation logic to also update the upload button's visibility --- def update_view(new_index, all_answers): q_data = STRUCTURED_QUESTIONS[new_index] progress = f"## {q_data['main_cat']} - Question {q_data['main_num']}{q_data['sub_letter']} ({new_index + 1} of {TOTAL_QUESTIONS})" # Conditional visibility logic for the drawing question drawing_q_visible = (new_index == DRAWING_Q_INDEX) return ( f"Say 🔊 {q_data['question']}", all_answers[new_index], new_index, progress, q_data["instruction"], QUESTION_CHOICES[new_index], gr.update(visible=drawing_q_visible), # Return the visibility update None # <-- ADDED: This 'None' value will clear the audio_input component ) # MODIFIED: Logic now split into separate functions def save_and_navigate(direction, current_index, current_answer, all_answers): all_answers[current_index] = current_answer if direction == "next": new_index = min(current_index + 1, TOTAL_QUESTIONS - 1) else: # prev new_index = max(current_index - 1, 0) return update_view(new_index, all_answers) + (all_answers,) # --- NEW: Function to handle jumping to a specific question --- def jump_to_question(selected_choice, current_index, current_answer, all_answers): if not selected_choice: # Ignore if dropdown is cleared # Return current state without change return update_view(current_index, all_answers) + (all_answers,) all_answers[current_index] = current_answer new_index = QUESTION_CHOICES.index(selected_choice) return update_view(new_index, all_answers) + (all_answers,) # NEW: A dedicated function to save the last answer before submitting def save_final_answer(current_index, current_answer, all_answers): all_answers[current_index] = current_answer return all_answers # --- NEW: Function to reset the entire application state --- def reset_app(): initial_q_data = STRUCTURED_QUESTIONS[0] initial_progress = f"## {initial_q_data['main_cat']} - Question {initial_q_data['main_num']}{initial_q_data['sub_letter']} (1 of {TOTAL_QUESTIONS})" initial_question_text = f"Say 🔊 {initial_q_data['question']}" initial_instruction = initial_q_data["instruction"] initial_dropdown_value = QUESTION_CHOICES[0] return ( 0, # question_index [""] * TOTAL_QUESTIONS, # answers "", # score_lines "", # total initial_question_text, # question_button initial_progress, # progress_text initial_instruction, # instruction_display "", # answer_text initial_dropdown_value, # jump_nav None, # audio_input None, # image_upload gr.update(visible=False), # start_over_btn (hide itself) gr.update(visible=True), # submit btn (show it again) None # <-- ADD THIS LINE to clear the tts_audio component ) def build_ui(): with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Mini Mental State Exam (MMSE) with Multimodal Inputs") question_index = gr.State(0) answers = gr.State([""] * TOTAL_QUESTIONS) # --- NEW: Navigation dropdown to jump to any question --- jump_nav = gr.Dropdown(choices=QUESTION_CHOICES, value=QUESTION_CHOICES[0], label="Jump to Question") initial_q_data = STRUCTURED_QUESTIONS[0] initial_progress = f"## {initial_q_data['main_cat']} - Question {initial_q_data['main_num']}{initial_q_data['sub_letter']} (1 of {TOTAL_QUESTIONS})" progress_text = gr.Markdown(initial_progress) instruction_display = gr.Markdown(initial_q_data["instruction"]) question_button = gr.Button(f"Say 🔊 {initial_q_data['question']}", variant="secondary") # Find the index for the drawing question DRAWING_Q_INDEX = -1 for i, q in enumerate(STRUCTURED_QUESTIONS): if "draw a copy of this picture" in q["question"]: DRAWING_Q_INDEX = i break # In your UI layout, create the image upload component with gr.Group(): answer_text = gr.Textbox(label="Your Answer (type or record below)") audio_input = gr.Audio(sources=["microphone"], label="🎤 Record Your Answer Here") # This component will be shown/hidden by the navigation logic image_upload = gr.Image(type="filepath", label="Upload Drawing for Drawing Question", visible=False) with gr.Row(): prev_btn = gr.Button("⬅️ Previous Question") next_btn = gr.Button("Next Question ➡️") tts_audio = gr.Audio(autoplay=True, visible=False) score_lines = gr.Textbox(label="Score by Question", lines=TOTAL_QUESTIONS) total = gr.Textbox(label="Total Score") # position Submit button at the bottom # MODIFIED: Position of submit button and new Start Over button submit = gr.Button("✅ Submit All Answers") start_over_btn = gr.Button("🔄 Start Over", visible=False) # New button, hidden by default audio_input.change(transcribe, inputs=audio_input, outputs=answer_text) # --- MODIFIED: Corrected input for the audio function to fix the bug --- question_button.click(speak_question, inputs=question_index, outputs=tts_audio) # --- MODIFIED: outputs_list now includes the audio_input component --- outputs_list = [ question_button, answer_text, question_index, progress_text, instruction_display, jump_nav, image_upload, audio_input, answers ] next_btn.click( save_and_navigate, inputs=[gr.Textbox("next", visible=False), question_index, answer_text, answers], outputs=outputs_list ) prev_btn.click( save_and_navigate, inputs=[gr.Textbox("prev", visible=False), question_index, answer_text, answers], outputs=outputs_list ) # --- NEW: Event listener for the jump navigation dropdown --- jump_nav.change( jump_to_question, inputs=[jump_nav, question_index, answer_text, answers], outputs=outputs_list ) # MODIFIED: Submit button now also hides itself and shows the "Start Over" button # The submit event must now collect the image path submit.click( save_final_answer, inputs=[question_index, answer_text, answers], outputs=answers ).then( fn=evaluate, inputs=[answers, image_upload], # Assuming image upload is handled outputs=[score_lines, total, submit, start_over_btn] # Control button visibility ) # --- FIX #2: Add the missing event listener for the "Start Over" button --- reset_outputs = [ question_index, answers, score_lines, total, question_button, progress_text, instruction_display, answer_text, jump_nav, audio_input, image_upload, start_over_btn, submit, tts_audio # <-- ADD THIS LINE ] start_over_btn.click( reset_app, inputs=None, outputs=reset_outputs ) return demo if __name__ == "__main__": build_ui().launch()