seayala's picture
Update app.py
cf08fbc verified
raw
history blame
1.94 kB
import gradio as gr
import tensorflow as tf
import librosa
import numpy as np
import tempfile
# Diccionario de etiquetas
labels = ['down', 'go', 'left', 'no', 'off', 'on', 'right', 'stop', 'up', 'yes']
def extract_features(file_name):
try:
audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccsscaled = np.mean(mfccs.T,axis=0)
except Exception as e:
print(f"Error encountered while parsing file: {file_name}")
print(e) # Imprime la excepci贸n completa
return None
return mfccsscaled
def classify_audio(audio_file):
print(f"Tipo de audio_file: {type(audio_file)}")
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
tmp_file.write(audio_file.read())
tmp_file_path = tmp_file.name
# Preprocesa el audio (con extract_features())
features = extract_features(tmp_file_path)
# Si features es None, hubo un error en extract_features
if features is None:
return "Error al procesar el audio"
features = features.reshape(1, -1) # Redimensiona a (1, 40)
# Si tu modelo necesita 3 dimensiones, redimensiona a (1, 40, 1)
# features = features.reshape(1, 40, 1)
# Carga del modelo para usar la CPU
model = tf.keras.models.load_model('my_model.h5', compile=False)
# Realiza la predicci贸n en la CPU
with tf.device('/CPU:0'):
prediction = model.predict(features)
predicted_label_index = np.argmax(prediction)
# Devuelve la etiqueta predicha
predicted_label = labels[predicted_label_index]
return predicted_label
iface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Clasificaci贸n de audio simple",
description="Sube un archivo de audio para clasificarlo."
)
iface.launch()