import gradio as gr import tensorflow as tf import librosa import numpy as np import tempfile # Diccionario de etiquetas labels = [ 'cat', 'house', 'marvin', 'nine', 'one', 'eight', 'three', 'five', 'zero', 'four', 'tree', 'wow', 'six', 'off', 'bed', 'seven', 'down', 'happy', 'on', 'yes', 'two', 'right', 'left', 'stop', 'go', 'no', 'sheila', 'up', 'bird', 'dog' ] def extract_features(file_name): try: # Resamplea a 16kHz audio, sample_rate = librosa.load(file_name, sr=16000) # Saca Mel-spectrograma mel_spectrogram = librosa.feature.melspectrogram( y=audio, sr=sample_rate, n_mels=257, n_fft=512, hop_length=256 ) # Convierte a escala logarítmica log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max) # Ajusta tamaño exacto log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=257, axis=0) log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=97, axis=1) # Normaliza log_mel_spectrogram = (log_mel_spectrogram - np.mean(log_mel_spectrogram)) / np.std(log_mel_spectrogram) # Añade canal log_mel_spectrogram = log_mel_spectrogram[..., np.newaxis] except Exception as e: print(f"Error encountered while parsing file: {file_name}") print(e) return None return log_mel_spectrogram def classify_audio(audio_file): print(f"Tipo de audio_file: {type(audio_file)}") # El tipo es string (ruta), no hace falta leer ni escribir en temp files file_path = audio_file # Extrae características features = extract_features(file_path) if features is None: return "Error al procesar el audio" # Añade batch dimension features = features[np.newaxis, ...] # (1, 97, 257, 1) # Carga el modelo en CPU model = tf.keras.models.load_model('my_model.h5', compile=False) with tf.device('/CPU:0'): prediction = model.predict(features) predicted_label_index = np.argmax(prediction) predicted_label = labels[predicted_label_index] return predicted_label iface = gr.Interface( fn=classify_audio, inputs=gr.Audio(type="filepath"), outputs="text", title="Clasificación de audio simple", description="Sube un archivo de audio para clasificarlo." ) iface.launch()