|
|
|
|
|
|
|
|
|
import argparse |
|
import multiprocessing as mp |
|
import os |
|
from functools import partial |
|
from time import time as timer |
|
|
|
import ffmpeg |
|
from tqdm import tqdm |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--input_dir', type=str, required=True, |
|
help='Dir containing youtube clips.') |
|
parser.add_argument('--clip_info_file', type=str, required=True, |
|
help='File containing clip information.') |
|
parser.add_argument('--output_dir', type=str, required=True, |
|
help='Location to dump outputs.') |
|
parser.add_argument('--num_workers', type=int, default=8, |
|
help='How many multiprocessing workers?') |
|
args = parser.parse_args() |
|
|
|
|
|
def get_h_w(filepath): |
|
probe = ffmpeg.probe(filepath) |
|
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None) |
|
height = int(video_stream['height']) |
|
width = int(video_stream['width']) |
|
return height, width |
|
|
|
|
|
def trim_and_crop(input_dir, output_dir, clip_params): |
|
video_name, H, W, S, E, L, T, R, B = clip_params.strip().split(',') |
|
H, W, S, E, L, T, R, B = int(H), int(W), int(S), int(E), int(L), int(T), int(R), int(B) |
|
output_filename = '{}_S{}_E{}_L{}_T{}_R{}_B{}.mp4'.format(video_name, S, E, L, T, R, B) |
|
output_filepath = os.path.join(output_dir, output_filename) |
|
if os.path.exists(output_filepath): |
|
print('Output file %s exists, skipping' % (output_filepath)) |
|
return |
|
|
|
input_filepath = os.path.join(input_dir, video_name + '.mp4') |
|
if not os.path.exists(input_filepath): |
|
print('Input file %s does not exist, skipping' % (input_filepath)) |
|
return |
|
|
|
h, w = get_h_w(input_filepath) |
|
t = int(T / H * h) |
|
b = int(B / H * h) |
|
l = int(L / W * w) |
|
r = int(R / W * w) |
|
stream = ffmpeg.input(input_filepath) |
|
stream = ffmpeg.trim(stream, start_frame=S, end_frame=E+1) |
|
stream = ffmpeg.crop(stream, l, t, r-l, b-t) |
|
stream = ffmpeg.output(stream, output_filepath) |
|
ffmpeg.run(stream) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
clip_info = [] |
|
with open(args.clip_info_file) as fin: |
|
for line in fin: |
|
clip_info.append(line.strip()) |
|
|
|
|
|
os.makedirs(args.output_dir, exist_ok=True) |
|
|
|
|
|
downloader = partial(trim_and_crop, args.input_dir, args.output_dir) |
|
|
|
start = timer() |
|
pool_size = args.num_workers |
|
print('Using pool size of %d' % (pool_size)) |
|
with mp.Pool(processes=pool_size) as p: |
|
_ = list(tqdm(p.imap_unordered(downloader, clip_info), total=len(clip_info))) |
|
print('Elapsed time: %.2f' % (timer() - start)) |
|
|