Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import librosa | |
| from sklearn.cluster import DBSCAN | |
| def extract_voice_features(audio_path, fps, video_duration): | |
| # Load the audio file | |
| y, sr = librosa.load(audio_path) | |
| # Calculate the number of samples per frame | |
| samples_per_frame = int(sr / fps) | |
| # Calculate the total number of frames | |
| total_frames = int(fps * video_duration) | |
| # Extract MFCC features | |
| mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
| # Segment the MFCCs to align with video frames | |
| segments = [] | |
| for i in range(total_frames): | |
| start = i * samples_per_frame | |
| end = start + samples_per_frame | |
| if end > mfccs.shape[1]: | |
| break | |
| segment = mfccs[:, start:end] | |
| segments.append(np.mean(segment, axis=1)) | |
| return np.array(segments) | |
| def cluster_voices(features): | |
| if len(features) < 2: | |
| print("Not enough voice segments for clustering. Assigning all to one cluster.") | |
| return np.zeros(len(features), dtype=int) | |
| dbscan = DBSCAN(eps=0.5, min_samples=5, metric='euclidean') | |
| clusters = dbscan.fit_predict(features) | |
| if np.all(clusters == -1): | |
| print("DBSCAN assigned all to noise. Considering as one cluster.") | |
| return np.zeros(len(features), dtype=int) | |
| return clusters | |
| def get_most_frequent_voice(features, clusters): | |
| largest_cluster = max(set(clusters), key=list(clusters).count) | |
| return features[clusters == largest_cluster] | |
| def process_audio(audio_path, fps, video_duration): | |
| features = extract_voice_features(audio_path, fps, video_duration) | |
| clusters = cluster_voices(features) | |
| most_frequent_voice = get_most_frequent_voice(features, clusters) | |
| return most_frequent_voice, features, clusters |