File size: 1,545 Bytes
160b238 c1d7af5 bc7c876 c1d7af5 160b238 21e2400 160b238 21e2400 bc7c876 c1d7af5 bc7c876 c1d7af5 21e2400 160b238 21e2400 cf08fbc 21e2400 cf08fbc 160b238 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
import tensorflow as tf
import librosa
import numpy as np
# Diccionario de etiquetas
labels = ['down', 'go', 'left', 'no', 'off', 'on', 'right', 'stop', 'up', 'yes']
def extract_features(file_name):
try:
audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccsscaled = np.mean(mfccs.T,axis=0)
except Exception as e:
print(f"Error encountered while parsing file: {file_name}")
print(e) # Imprime la excepción completa
return None
return mfccsscaled
def classify_audio(audio_file):
print(f"Tipo de audio_file: {type(audio_file)}") # Debería imprimir <class 'str'>
# Preprocesa el audio directamente
features = extract_features(audio_file)
if features is None:
return "Error al procesar el audio"
features = features.reshape(1, -1)
# Carga el modelo (asegúrate que 'my_model.h5' esté en el mismo directorio)
model = tf.keras.models.load_model('my_model.h5', compile=False)
with tf.device('/CPU:0'):
prediction = model.predict(features)
predicted_label_index = np.argmax(prediction)
predicted_label = labels[predicted_label_index]
return predicted_label
iface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Clasificación de audio simple",
description="Sube un archivo de audio para clasificarlo."
)
iface.launch() |