Spaces:

Kaworu17
/

YAMNet

Sleeping

App Files Files Community

Kaworu17 commited on May 5

Commit

29e4b0d

verified ·

1 Parent(s): e63bfc0

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -7

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import matplotlib.pyplot as plt
 import gradio as gr
 import soundfile as sf
 from scipy.signal import resample
 # Load YAMNet model from TensorFlow Hub
 yamnet_model = hub.load("https://tfhub.dev/google/yamnet/1")
@@ -20,12 +22,28 @@ def load_class_map():
 class_names = load_class_map()
-# Classification function
-def classify_audio(file_path):
     try:
-        # Load audio
         audio_data, sample_rate = sf.read(file_path)
         # Convert stereo to mono
         if len(audio_data.shape) > 1:
             audio_data = np.mean(audio_data, axis=1)
@@ -41,18 +59,19 @@ def classify_audio(file_path):
             audio_data = resample(audio_data, new_length)
             sample_rate = target_rate
-        # Convert to tensor
         waveform = tf.convert_to_tensor(audio_data, dtype=tf.float32)
-        # Run YAMNet
         scores, embeddings, spectrogram = yamnet_model(waveform)
         mean_scores = tf.reduce_mean(scores, axis=0).numpy()
         top_5 = np.argsort(mean_scores)[::-1][:5]
         top_prediction = class_names[top_5[0]]
         top_scores = {class_names[i]: float(mean_scores[i]) for i in top_5}
-        # Waveform plot
         fig, ax = plt.subplots()
         ax.plot(audio_data)
         ax.set_title("Waveform")
@@ -65,7 +84,7 @@ def classify_audio(file_path):
     except Exception as e:
         return f"Error processing audio: {str(e)}", {}, None
-# Gradio interface (HF-compatible)
 interface = gr.Interface(
     fn=classify_audio,
     inputs=gr.Audio(type="filepath", label="Upload .wav or .mp3 audio file"),

 import gradio as gr
 import soundfile as sf
 from scipy.signal import resample
+import tempfile
+import os
 # Load YAMNet model from TensorFlow Hub
 yamnet_model = hub.load("https://tfhub.dev/google/yamnet/1")
 class_names = load_class_map()
+# Main classification function
+def classify_audio(audio_input):
     try:
+        # Case 1: Filepath from Gradio UI
+        if isinstance(audio_input, str):
+            file_path = audio_input
+        # Case 2: Binary upload (n8n POST) without .name attribute
+        elif hasattr(audio_input, "read"):
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+                tmp.write(audio_input.read())
+                file_path = tmp.name
+        else:
+            raise ValueError("Unsupported input format")
+        # Load audio file
         audio_data, sample_rate = sf.read(file_path)
+        # Cleanup if temp file was created
+        if 'tmp' in locals():
+            os.unlink(tmp.name)
         # Convert stereo to mono
         if len(audio_data.shape) > 1:
             audio_data = np.mean(audio_data, axis=1)
             audio_data = resample(audio_data, new_length)
             sample_rate = target_rate
+        # Tensor for model
         waveform = tf.convert_to_tensor(audio_data, dtype=tf.float32)
+        # Run YAMNet model
         scores, embeddings, spectrogram = yamnet_model(waveform)
         mean_scores = tf.reduce_mean(scores, axis=0).numpy()
         top_5 = np.argsort(mean_scores)[::-1][:5]
+        # Output results
         top_prediction = class_names[top_5[0]]
         top_scores = {class_names[i]: float(mean_scores[i]) for i in top_5}
+        # Plot waveform
         fig, ax = plt.subplots()
         ax.plot(audio_data)
         ax.set_title("Waveform")
     except Exception as e:
         return f"Error processing audio: {str(e)}", {}, None
+# Gradio Interface
 interface = gr.Interface(
     fn=classify_audio,
     inputs=gr.Audio(type="filepath", label="Upload .wav or .mp3 audio file"),