dataset_shuvani / prepare_data.py
yushan-wiseyak's picture
Upload folder using huggingface_hub
b98f4e5 verified
import csv
import pandas as pd
import shutil
audio_annotation_base_path = "/home/oem/wiseyak/suraj/speech_deployment/tts_deployment/tts_data_annotator/annotated_audio_dataset/"
audio_annotations = pd.read_csv("metadata.txt", delimiter='|',header=None)
out_path = "/home/oem/wiseyak/suraj/TTS/dataset/shuvani/wavs/"
for item in audio_annotations[0]:
audio_path = audio_annotation_base_path + item
shutil.copy(audio_path,out_path)
# print(item)
# break