import os |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' |
os.environ["CUDA_VISIBLE_DEVICES"] = "1" |
import numpy as np |
import tensorflow as tf |
import vggish_input |
import vggish_params |
import vggish_slim |
import contextlib |
import wave |
def get_audio_len(audio_file): |
with contextlib.closing(wave.open(audio_file, 'r')) as f: |
frames = f.getnframes() |
rate = f.getframerate() |
wav_length = int(frames / float(rate)) |
return wav_length |
checkpoint_path = './vggish_model.ckpt' |
pca_params_path = './vggish_pca_params.npz' |
freq = 1000 |
sr = 44100 |
audio_root = "./datasets/" |
for subset in ["train", "val", "test"]: |
print("{} ----------> ".format(subset)) |
audio_dir = os.path.join(audio_root, subset, "WAVAudios") |
save_dir = os.path.join(audio_root, subset, "FEATAudios") |
if not os.path.exists(save_dir): |
os.makedirs(save_dir) |
lis = sorted(os.listdir(audio_dir)) |
len_data = len(lis) |
print(len_data) |
i = 0 |
for n in range(len_data): |
i += 1 |
outfile = os.path.join(save_dir, lis[n][:-4] + '.npy') |
if os.path.exists(outfile): |
print("\nProcessing: ", i, " / ", len_data, " ----> ", lis[n][:-4] + '.npy', " is already exist! ") |
continue |
'''feature learning by VGG-net trained by audioset''' |
audio_index = os.path.join(audio_dir, lis[n]) |
num_secs = len(os.listdir(os.path.join(audio_root, subset, "JPEGImages", lis[n][:-4]))) |
input_batch = vggish_input.wavfile_to_examples(audio_index, num_secs) |
np.testing.assert_equal( |
input_batch.shape, |
[num_secs, vggish_params.NUM_FRAMES, vggish_params.NUM_BANDS]) |
with tf.Graph().as_default(), tf.compat.v1.Session() as sess: |
vggish_slim.define_vggish_slim() |
vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path) |
features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME) |
embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME) |
[embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: input_batch}) |
np.save(outfile, embedding_batch) |
print(" save info: ", lis[n][:-4] + '.npy', " ---> ", embedding_batch.shape) |
i += 1 |
print("\n---------------------------------- end ----------------------------------\n") |