import argparse |
import multiprocessing as mp |
import os |
from functools import partial |
from time import time as timer |
import ffmpeg |
from tqdm import tqdm |
parser = argparse.ArgumentParser() |
parser.add_argument('--input_dir', type=str, required=True, |
help='Dir containing youtube clips.') |
parser.add_argument('--clip_info_file', type=str, required=True, |
help='File containing clip information.') |
parser.add_argument('--output_dir', type=str, required=True, |
help='Location to dump outputs.') |
parser.add_argument('--num_workers', type=int, default=8, |
help='How many multiprocessing workers?') |
args = parser.parse_args() |
def get_h_w(filepath): |
probe = ffmpeg.probe(filepath) |
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None) |
height = int(video_stream['height']) |
width = int(video_stream['width']) |
return height, width |
def trim_and_crop(input_dir, output_dir, clip_params): |
video_name, H, W, S, E, L, T, R, B = clip_params.strip().split(',') |
H, W, S, E, L, T, R, B = int(H), int(W), int(S), int(E), int(L), int(T), int(R), int(B) |
output_filename = '{}_S{}_E{}_L{}_T{}_R{}_B{}.mp4'.format(video_name, S, E, L, T, R, B) |
output_filepath = os.path.join(output_dir, output_filename) |
if os.path.exists(output_filepath): |
print('Output file %s exists, skipping' % (output_filepath)) |
return |
input_filepath = os.path.join(input_dir, video_name + '.mp4') |
if not os.path.exists(input_filepath): |
print('Input file %s does not exist, skipping' % (input_filepath)) |
return |
h, w = get_h_w(input_filepath) |
t = int(T / H * h) |
b = int(B / H * h) |
l = int(L / W * w) |
r = int(R / W * w) |
stream = ffmpeg.input(input_filepath) |
stream = ffmpeg.trim(stream, start_frame=S, end_frame=E+1) |
stream = ffmpeg.crop(stream, l, t, r-l, b-t) |
stream = ffmpeg.output(stream, output_filepath) |
ffmpeg.run(stream) |
if __name__ == '__main__': |
clip_info = [] |
with open(args.clip_info_file) as fin: |
for line in fin: |
clip_info.append(line.strip()) |
os.makedirs(args.output_dir, exist_ok=True) |
downloader = partial(trim_and_crop, args.input_dir, args.output_dir) |
start = timer() |
pool_size = args.num_workers |
print('Using pool size of %d' % (pool_size)) |
with mp.Pool(processes=pool_size) as p: |
_ = list(tqdm(p.imap_unordered(downloader, clip_info), total=len(clip_info))) |
print('Elapsed time: %.2f' % (timer() - start)) |