nguyennp86 commited on
Commit
3b91620
Β·
verified Β·
1 Parent(s): a344700

Update src/utils.py

Browse files
Files changed (1) hide show
  1. src/utils.py +125 -83
src/utils.py CHANGED
@@ -1,83 +1,125 @@
1
- """
2
- Utility functions for visualization and analysis
3
- """
4
-
5
- import numpy as np
6
- import matplotlib.pyplot as plt
7
- import librosa
8
- import librosa.display
9
-
10
- def create_waveform_plot(y, sr, title="Audio Waveform"):
11
- """
12
- Create waveform visualization
13
-
14
- Args:
15
- y (np.array): Audio time series
16
- sr (int): Sample rate
17
- title (str): Plot title
18
-
19
- Returns:
20
- matplotlib.figure.Figure: Waveform plot
21
- """
22
- fig, ax = plt.subplots(figsize=(10, 3))
23
- librosa.display.waveshow(y, sr=sr, ax=ax, color='#2E86DE')
24
- ax.set_title(title, fontsize=14, fontweight='bold')
25
- ax.set_xlabel('Time (seconds)', fontsize=11)
26
- ax.set_ylabel('Amplitude', fontsize=11)
27
- ax.grid(True, alpha=0.3)
28
- plt.tight_layout()
29
- return fig
30
-
31
- def create_spectrogram_plot(y, sr, title="Spectrogram"):
32
- """
33
- Create spectrogram visualization
34
-
35
- Args:
36
- y (np.array): Audio time series
37
- sr (int): Sample rate
38
- title (str): Plot title
39
-
40
- Returns:
41
- matplotlib.figure.Figure: Spectrogram plot
42
- """
43
- fig, ax = plt.subplots(figsize=(10, 4))
44
- D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
45
- img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz',
46
- ax=ax, cmap='viridis')
47
- ax.set_title(title, fontsize=14, fontweight='bold')
48
- ax.set_xlabel('Time (seconds)', fontsize=11)
49
- ax.set_ylabel('Frequency (Hz)', fontsize=11)
50
- fig.colorbar(img, ax=ax, format='%+2.0f dB')
51
- plt.tight_layout()
52
- return fig
53
-
54
- def create_mel_spectrogram_plot(y, sr, title="Mel Spectrogram"):
55
- """
56
- Create mel spectrogram visualization
57
-
58
- Args:
59
- y (np.array): Audio time series
60
- sr (int): Sample rate
61
- title (str): Plot title
62
-
63
- Returns:
64
- matplotlib.figure.Figure: Mel spectrogram plot
65
- """
66
- fig, ax = plt.subplots(figsize=(10, 4))
67
- S = librosa.feature.melspectrogram(y=y, sr=sr)
68
- S_dB = librosa.power_to_db(S, ref=np.max)
69
- img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel',
70
- ax=ax, cmap='magma')
71
- ax.set_title(title, fontsize=14, fontweight='bold')
72
- ax.set_xlabel('Time (seconds)', fontsize=11)
73
- ax.set_ylabel('Mel Frequency', fontsize=11)
74
- fig.colorbar(img, ax=ax, format='%+2.0f dB')
75
- plt.tight_layout()
76
- return fig
77
-
78
- def format_probability_text(prob_dict, top_k=None):
79
- """
80
- Format probability dictionary as text with progress bars
81
-
82
- Args:
83
- prob_dict (dict): Dictionary of e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for visualization and analysis
3
+ """
4
+
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import librosa
8
+ import librosa.display
9
+
10
+ def create_waveform_plot(y, sr, title="Audio Waveform"):
11
+ """
12
+ Create waveform visualization
13
+
14
+ Args:
15
+ y (np.array): Audio time series
16
+ sr (int): Sample rate
17
+ title (str): Plot title
18
+
19
+ Returns:
20
+ matplotlib.figure.Figure: Waveform plot
21
+ """
22
+ fig, ax = plt.subplots(figsize=(10, 3))
23
+ librosa.display.waveshow(y, sr=sr, ax=ax, color='#2E86DE')
24
+ ax.set_title(title, fontsize=14, fontweight='bold')
25
+ ax.set_xlabel('Time (seconds)', fontsize=11)
26
+ ax.set_ylabel('Amplitude', fontsize=11)
27
+ ax.grid(True, alpha=0.3)
28
+ plt.tight_layout()
29
+ return fig
30
+
31
+ def create_spectrogram_plot(y, sr, title="Spectrogram"):
32
+ """
33
+ Create spectrogram visualization
34
+
35
+ Args:
36
+ y (np.array): Audio time series
37
+ sr (int): Sample rate
38
+ title (str): Plot title
39
+
40
+ Returns:
41
+ matplotlib.figure.Figure: Spectrogram plot
42
+ """
43
+ fig, ax = plt.subplots(figsize=(10, 4))
44
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
45
+ img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz',
46
+ ax=ax, cmap='viridis')
47
+ ax.set_title(title, fontsize=14, fontweight='bold')
48
+ ax.set_xlabel('Time (seconds)', fontsize=11)
49
+ ax.set_ylabel('Frequency (Hz)', fontsize=11)
50
+ fig.colorbar(img, ax=ax, format='%+2.0f dB')
51
+ plt.tight_layout()
52
+ return fig
53
+
54
+ def create_mel_spectrogram_plot(y, sr, title="Mel Spectrogram"):
55
+ """
56
+ Create mel spectrogram visualization
57
+
58
+ Args:
59
+ y (np.array): Audio time series
60
+ sr (int): Sample rate
61
+ title (str): Plot title
62
+
63
+ Returns:
64
+ matplotlib.figure.Figure: Mel spectrogram plot
65
+ """
66
+ fig, ax = plt.subplots(figsize=(10, 4))
67
+ S = librosa.feature.melspectrogram(y=y, sr=sr)
68
+ S_dB = librosa.power_to_db(S, ref=np.max)
69
+ img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel',
70
+ ax=ax, cmap='magma')
71
+ ax.set_title(title, fontsize=14, fontweight='bold')
72
+ ax.set_xlabel('Time (seconds)', fontsize=11)
73
+ ax.set_ylabel('Mel Frequency', fontsize=11)
74
+ fig.colorbar(img, ax=ax, format='%+2.0f dB')
75
+ plt.tight_layout()
76
+ return fig
77
+
78
+ def format_probability_text(prob_dict, top_k=None):
79
+ """
80
+ Format probability dictionary as text with progress bars
81
+
82
+ Args:
83
+ prob_dict (dict): Dictionary of emotion: probability
84
+ top_k (int): Show only top K emotions (None for all)
85
+
86
+ Returns:
87
+ str: Formatted text
88
+ """
89
+ # Sort by probability
90
+ sorted_probs = sorted(prob_dict.items(), key=lambda x: x[1], reverse=True)
91
+
92
+ if top_k:
93
+ sorted_probs = sorted_probs[:top_k]
94
+
95
+ text = ""
96
+ for emotion, prob in sorted_probs:
97
+ # Create progress bar
98
+ bar_length = int(prob * 30)
99
+ bar = 'β–ˆ' * bar_length + 'β–‘' * (30 - bar_length)
100
+
101
+ text += f"**{emotion.capitalize()}**: {bar} {prob*100:.2f}%\n"
102
+
103
+ return text
104
+
105
+ def get_emotion_emoji(emotion):
106
+ """
107
+ Get emoji for emotion
108
+
109
+ Args:
110
+ emotion (str): Emotion name
111
+
112
+ Returns:
113
+ str: Emoji character
114
+ """
115
+ emoji_map = {
116
+ 'angry': '😠',
117
+ 'calm': '😌',
118
+ 'disgust': '🀒',
119
+ 'fearful': '😨',
120
+ 'happy': '😊',
121
+ 'neutral': '😐',
122
+ 'sad': '😒',
123
+ 'surprised': '😲'
124
+ }
125
+ return emoji_map.get(emotion.lower(), '🎭')