Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,8 @@ from transformers import pipeline
|
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
from pydub import AudioSegment
|
| 15 |
import noisereduce as nr
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# π¨ Apply Custom Dark Mode CSS
|
| 18 |
st.markdown(
|
|
@@ -100,12 +102,28 @@ if uploaded_file:
|
|
| 100 |
# Load audio
|
| 101 |
y, sr = librosa.load(file_path, sr=16000)
|
| 102 |
|
| 103 |
-
# π΅ Display waveform
|
| 104 |
-
st.markdown("<div class='subheader'>πΌ Audio Waveform:</div>", unsafe_allow_html=True)
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
# β
Noise Reduction
|
| 111 |
st.markdown("<div class='subheader'>π Applying Noise Reduction...</div>", unsafe_allow_html=True)
|
|
@@ -113,6 +131,47 @@ if uploaded_file:
|
|
| 113 |
denoised_path = file_path.replace(".wav", "_denoised.wav")
|
| 114 |
sf.write(denoised_path, y_denoised, sr)
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
# β
Speech-to-Text using Vosk
|
| 117 |
def transcribe_audio(audio_path):
|
| 118 |
wf = wave.open(audio_path, "rb")
|
|
@@ -131,7 +190,7 @@ if uploaded_file:
|
|
| 131 |
st.markdown("<div class='subheader'>π Transcribed Text:</div>", unsafe_allow_html=True)
|
| 132 |
st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
|
| 133 |
|
| 134 |
-
# β
Emotion Detection
|
| 135 |
st.markdown("<div class='subheader'>π Emotion Analysis:</div>", unsafe_allow_html=True)
|
| 136 |
|
| 137 |
emotion_result = emotion_model(file_path)
|
|
@@ -156,8 +215,5 @@ if uploaded_file:
|
|
| 156 |
)
|
| 157 |
|
| 158 |
# β
Play Original & Denoised Audio
|
| 159 |
-
st.
|
| 160 |
-
st.audio(
|
| 161 |
-
|
| 162 |
-
st.markdown("<div class='subheader'>π Denoised Audio:</div>", unsafe_allow_html=True)
|
| 163 |
-
st.audio(denoised_path, format="audio/wav", start_time=0)
|
|
|
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
from pydub import AudioSegment
|
| 15 |
import noisereduce as nr
|
| 16 |
+
import plotly.graph_objects as go
|
| 17 |
+
import plotly.express as px
|
| 18 |
|
| 19 |
# π¨ Apply Custom Dark Mode CSS
|
| 20 |
st.markdown(
|
|
|
|
| 102 |
# Load audio
|
| 103 |
y, sr = librosa.load(file_path, sr=16000)
|
| 104 |
|
| 105 |
+
# π΅ Display waveform using Plotly
|
| 106 |
+
st.markdown("<div class='subheader'>πΌ Interactive Audio Waveform:</div>", unsafe_allow_html=True)
|
| 107 |
+
|
| 108 |
+
time_axis = np.linspace(0, len(y) / sr, num=len(y))
|
| 109 |
+
|
| 110 |
+
fig_waveform = go.Figure()
|
| 111 |
+
fig_waveform.add_trace(go.Scatter(
|
| 112 |
+
x=time_axis,
|
| 113 |
+
y=y,
|
| 114 |
+
mode='lines',
|
| 115 |
+
line=dict(color='cyan'),
|
| 116 |
+
name="Waveform"
|
| 117 |
+
))
|
| 118 |
+
|
| 119 |
+
fig_waveform.update_layout(
|
| 120 |
+
title="Audio Waveform",
|
| 121 |
+
xaxis_title="Time (seconds)",
|
| 122 |
+
yaxis_title="Amplitude",
|
| 123 |
+
template="plotly_dark"
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
st.plotly_chart(fig_waveform)
|
| 127 |
|
| 128 |
# β
Noise Reduction
|
| 129 |
st.markdown("<div class='subheader'>π Applying Noise Reduction...</div>", unsafe_allow_html=True)
|
|
|
|
| 131 |
denoised_path = file_path.replace(".wav", "_denoised.wav")
|
| 132 |
sf.write(denoised_path, y_denoised, sr)
|
| 133 |
|
| 134 |
+
# β
Spectrogram using Plotly
|
| 135 |
+
st.markdown("<div class='subheader'>π€ Spectrogram (Frequency Analysis):</div>", unsafe_allow_html=True)
|
| 136 |
+
|
| 137 |
+
S = librosa.stft(y)
|
| 138 |
+
S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
|
| 139 |
+
|
| 140 |
+
fig_spectrogram = px.imshow(
|
| 141 |
+
S_db,
|
| 142 |
+
aspect='auto',
|
| 143 |
+
origin='lower',
|
| 144 |
+
labels={"x": "Time (frames)", "y": "Frequency (bins)", "color": "Intensity (dB)"},
|
| 145 |
+
color_continuous_scale="plasma"
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
fig_spectrogram.update_layout(
|
| 149 |
+
title="Spectrogram",
|
| 150 |
+
template="plotly_dark"
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
st.plotly_chart(fig_spectrogram)
|
| 154 |
+
|
| 155 |
+
# β
MFCC using Plotly
|
| 156 |
+
st.markdown("<div class='subheader'>π΅ MFCC Feature Extraction:</div>", unsafe_allow_html=True)
|
| 157 |
+
|
| 158 |
+
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
| 159 |
+
|
| 160 |
+
fig_mfcc = px.imshow(
|
| 161 |
+
mfccs,
|
| 162 |
+
aspect='auto',
|
| 163 |
+
origin='lower',
|
| 164 |
+
labels={"x": "Time (frames)", "y": "MFCC Coefficients", "color": "Magnitude"},
|
| 165 |
+
color_continuous_scale="viridis"
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
fig_mfcc.update_layout(
|
| 169 |
+
title="Mel-Frequency Cepstral Coefficients (MFCC)",
|
| 170 |
+
template="plotly_dark"
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
st.plotly_chart(fig_mfcc)
|
| 174 |
+
|
| 175 |
# β
Speech-to-Text using Vosk
|
| 176 |
def transcribe_audio(audio_path):
|
| 177 |
wf = wave.open(audio_path, "rb")
|
|
|
|
| 190 |
st.markdown("<div class='subheader'>π Transcribed Text:</div>", unsafe_allow_html=True)
|
| 191 |
st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
|
| 192 |
|
| 193 |
+
# β
Emotion Detection
|
| 194 |
st.markdown("<div class='subheader'>π Emotion Analysis:</div>", unsafe_allow_html=True)
|
| 195 |
|
| 196 |
emotion_result = emotion_model(file_path)
|
|
|
|
| 215 |
)
|
| 216 |
|
| 217 |
# β
Play Original & Denoised Audio
|
| 218 |
+
st.audio(file_path, format="audio/wav")
|
| 219 |
+
st.audio(denoised_path, format="audio/wav")
|
|
|
|
|
|
|
|
|