Spaces:
Sleeping
Sleeping
⚡ Trim audio to 30s for stable analysis on Hugging Face
Browse files
app.py
CHANGED
|
@@ -29,32 +29,41 @@ def analyze_emotion(audio_path):
|
|
| 29 |
extractor, model = load_model()
|
| 30 |
waveform, sr = torchaudio.load(audio_path)
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
inputs = extractor(waveform[0].numpy(), sampling_rate=16000, return_tensors="pt")
|
| 36 |
with torch.no_grad():
|
| 37 |
logits = model(**inputs).logits[0]
|
| 38 |
|
| 39 |
emotion, scores = get_emotion_label(logits)
|
| 40 |
-
return emotion.capitalize(), scores
|
| 41 |
|
| 42 |
-
# UI
|
| 43 |
st.set_page_config(page_title="🎧 Audio Emotion Detector", layout="centered")
|
| 44 |
st.title("🎧 Audio Emotion Analysis (Wav2Vec2)")
|
| 45 |
|
| 46 |
uploaded_file = st.file_uploader("Upload an MP3 or WAV audio file", type=["mp3", "wav"])
|
| 47 |
|
| 48 |
if uploaded_file:
|
| 49 |
-
st.audio(uploaded_file)
|
| 50 |
with st.spinner("Analyzing emotion..."):
|
| 51 |
wav_path = convert_to_wav(uploaded_file)
|
| 52 |
-
emotion, scores = analyze_emotion(wav_path)
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
st.subheader("Detected Emotion:")
|
| 55 |
-
st.markdown(f"
|
| 56 |
|
| 57 |
-
st.subheader("Confidence Scores:")
|
| 58 |
emotions = ["angry", "happy", "neutral", "sad"]
|
| 59 |
for i, label in enumerate(emotions):
|
| 60 |
st.write(f"- **{label.capitalize()}**: {scores[i]*100:.2f}%")
|
|
|
|
| 29 |
extractor, model = load_model()
|
| 30 |
waveform, sr = torchaudio.load(audio_path)
|
| 31 |
|
| 32 |
+
# 💡 Trim audio to 30 seconds max to avoid slowdowns
|
| 33 |
+
max_duration_sec = 30
|
| 34 |
+
max_samples = sr * max_duration_sec
|
| 35 |
+
if waveform.size(1) > max_samples:
|
| 36 |
+
waveform = waveform[:, :max_samples]
|
| 37 |
|
| 38 |
+
duration_sec = waveform.size(1) / sr
|
| 39 |
+
|
| 40 |
+
# Run model
|
| 41 |
inputs = extractor(waveform[0].numpy(), sampling_rate=16000, return_tensors="pt")
|
| 42 |
with torch.no_grad():
|
| 43 |
logits = model(**inputs).logits[0]
|
| 44 |
|
| 45 |
emotion, scores = get_emotion_label(logits)
|
| 46 |
+
return emotion.capitalize(), scores, duration_sec
|
| 47 |
|
| 48 |
+
# Streamlit UI
|
| 49 |
st.set_page_config(page_title="🎧 Audio Emotion Detector", layout="centered")
|
| 50 |
st.title("🎧 Audio Emotion Analysis (Wav2Vec2)")
|
| 51 |
|
| 52 |
uploaded_file = st.file_uploader("Upload an MP3 or WAV audio file", type=["mp3", "wav"])
|
| 53 |
|
| 54 |
if uploaded_file:
|
| 55 |
+
st.audio(uploaded_file, format='audio/wav')
|
| 56 |
with st.spinner("Analyzing emotion..."):
|
| 57 |
wav_path = convert_to_wav(uploaded_file)
|
| 58 |
+
emotion, scores, duration_sec = analyze_emotion(wav_path)
|
| 59 |
+
|
| 60 |
+
st.subheader("⏱ Audio Info:")
|
| 61 |
+
st.write(f"Duration analyzed: **{duration_sec:.2f} seconds**")
|
| 62 |
|
| 63 |
+
st.subheader("🧠 Detected Emotion:")
|
| 64 |
+
st.markdown(f"**{emotion}**")
|
| 65 |
|
| 66 |
+
st.subheader("🎯 Confidence Scores:")
|
| 67 |
emotions = ["angry", "happy", "neutral", "sad"]
|
| 68 |
for i, label in enumerate(emotions):
|
| 69 |
st.write(f"- **{label.capitalize()}**: {scores[i]*100:.2f}%")
|