Spaces:

GF-John
/

sam2

Running on Zero

App Files Files Community

John Ho commited on Jun 2

Commit

e7334c8

1 Parent(s): df7d2e0

init comit

Browse files

Files changed (6) hide show

app.py +60 -0
ffmpeg_extractor.py +242 -0
requirements.txt +7 -0
samv2_handler.py +214 -0
toolbox/mask_encoding.py +43 -0
toolbox/vid_utils.py +351 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import gradio as gr
+import spaces, torch
+from samv2_handler import load_sam_image_model, run_sam_im_inference
+from PIL import Image
+from typing import Union
+@spaces.GPU
+def load_im_model(variant, auto_mask_gen: bool = False):
+    return load_sam_image_model(
+        variant=variant, device="cuda", auto_mask_gen=auto_mask_gen
+    )
+@spaces.GPU
+def detect_image(
+    im: Image.Image,
+    variant: str,
+    bboxes: Union[list, str] = None,
+    points: Union[list, str] = None,
+    point_labels: Union[list, str] = None,
+):
+    """
+    SAM2 Image Segmentation
+    Args:
+        im: Pillow Image
+        object_name: the object you would like to detect
+        mode: point or object_detection
+    Returns:
+        list: a list of masks
+    """
+    bboxes = json.loads(bboxes) if isinstance(bboxes, str) else bboxes
+    model = load_im_model(variant=variant)
+    return run_sam_im_inference(
+        model, image=im, bboxes=bboxes, get_pil_mask=False, b64_encode_mask=True
+    )
+with gr.Blocks() as demo:
+    with gr.Tab("Images"):
+        gr.Interface(
+            fn=detect_image,
+            inputs=[
+                gr.Image(label="Input Image", type="pil"),
+                gr.Dropdown(
+                    label="Model Variant",
+                    choices=["tiny", "small", "base_plus", "large"],
+                ),
+                gr.JSON(
+                    label='Bounding Boxes (JSON list of dicts: [{"x0":..., "y0":..., "x1":..., "y1":...}, ...])',
+                    optional=True,
+                ),
+            ],
+            outputs=gr.JSON(label="Output JSON"),
+            title="SAM2 for Images",
+        )
+demo.launch(
+    mcp_server=True, app_kwargs={"docs_url": "/docs"}  # add FastAPI Swagger API Docs
+)

ffmpeg_extractor.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import ffmpeg, typer, os, sys, json, shutil
+from loguru import logger
+logger.remove()
+logger.add(
+    sys.stderr,
+    format="<d>{time:YYYY-MM-DD ddd HH:mm:ss}</d> | <lvl>{level}</lvl> | <lvl>{message}</lvl>",
+)
+app = typer.Typer(pretty_exceptions_show_locals=False)
+def parse_frame_name(fname: str):
+    """return a tuple of frame_type and frame_index"""
+    fn, fext = os.path.splitext(os.path.basename(fname))
+    frame_type, frame_index = fn.split("_")
+    return frame_type, int(frame_index)
+def get_fps_ffmpeg(video_path: str):
+    probe = ffmpeg.probe(video_path)
+    # Find the first video stream
+    video_stream = next(
+        (stream for stream in probe["streams"] if stream["codec_type"] == "video"), None
+    )
+    if video_stream is None:
+        raise ValueError("No video stream found")
+    # Frame rate is given as a string fraction, e.g., '30000/1001'
+    r_frame_rate = video_stream["r_frame_rate"]
+    num, denom = map(int, r_frame_rate.split("/"))
+    return num / denom
+@app.command()
+def extract_keyframes_greedy(
+    video_path: str,
+    output_dir: str = None,
+    threshold: float = 0.2,
+    overwrite: bool = False,
+):
+    """
+    run i-frames extractions and keyframes extraction and return a list of keyframe's paths
+    """
+    assert (
+        threshold > 0
+    ), f"threshold must be no negative, for i-frame extraction use extract-keyframes instead"
+    iframes = extract_keyframes(
+        video_path,
+        output_dir=output_dir,
+        threshold=0,
+        overwrite=overwrite,
+        append=False,
+    )
+    assert type(iframes) != type(None), f"i-frames extraction failed"
+    kframes = extract_keyframes(
+        video_path,
+        output_dir=output_dir,
+        threshold=threshold,
+        overwrite=False,
+        append=True,
+    )
+    assert type(kframes) != type(None), f"keyframes extraction failed"
+    # remove kframes that are also iframes
+    removed_kframes = []
+    for fn in kframes:
+        fname = os.path.basename(fn)
+        if os.path.isfile(
+            os.path.join(os.path.dirname(fn), fname.replace("kframe_", "iframe_"))
+        ):
+            os.remove(fn)
+            removed_kframes.append(fn)
+    if len(removed_kframes) > 0:
+        logger.warning(f"removed {len(removed_kframes)} redundant kframes")
+        kframes = [kf for kf in kframes if kf not in removed_kframes]
+    frames = iframes + kframes
+    logger.success(f"extracted {len(frames)} total frames")
+    return frames
+@app.command()
+def extract_keyframes(
+    video_path: str,
+    output_dir: str = None,
+    threshold: float = 0.3,
+    overwrite: bool = False,
+    append: bool = False,
+):
+    """extract keyframes as images into output_dir and return a list of keyframe's paths
+    Args:
+        output_dir: if not provided, will be in video_name/keyframes/
+    """
+    # Create output directory if it doesn't exist
+    output_dir = output_dir if output_dir else os.path.dirname(video_path)
+    vname, vext = os.path.splitext(os.path.basename(video_path))
+    output_dir = os.path.join(output_dir, vname, "keyframes")
+    if os.path.isdir(output_dir):
+        if overwrite:
+            shutil.rmtree(output_dir)
+            logger.warning(f"removed existing data: {output_dir}")
+        elif not append:
+            logger.error(f"overwrite is false and data already exists!")
+            return None
+    os.makedirs(output_dir, exist_ok=True)
+    # Construct the ffmpeg-python pipeline
+    stream = ffmpeg.input(video_path)
+    config_dict = {
+        "vsync": "0",
+        "frame_pts": "true",
+    }
+    if threshold:
+        # always add in the first frame by default
+        filter_value = f"eq(n,0)+gt(scene,{threshold})"
+        frame_name = "kframe"
+        logger.info(f"Extracting Scene-changing frames with {filter_value}")
+    else:
+        filter_value = f"eq(pict_type,I)"
+        # config_dict["skip_frame"] = "nokey"
+        frame_name = "iframe"
+        logger.info(f"Extracting I-Frames since no threshold provided: {filter_value}")
+    stream = ffmpeg.filter(stream, "select", filter_value)
+    stream = ffmpeg.output(stream, f"{output_dir}/{frame_name}_%d.jpg", **config_dict)
+    # Execute the ffmpeg command
+    try:
+        ffmpeg.run(stream, capture_stdout=True, capture_stderr=True)
+        frames = [
+            os.path.join(output_dir, f)
+            for f in os.listdir(output_dir)
+            if f.endswith(".jpg") and frame_name in f
+        ]
+        logger.success(f"{len(frames)} {frame_name} extracted to {output_dir}")
+        return frames
+    except ffmpeg.Error as e:
+        logger.error(f"Error executing FFmpeg command: {e.stderr.decode()}")
+    return None
+@app.command()
+def extract_audio(video_path: str, output_dir: str = None, overwrite: bool = False):
+    """extracting audio of a video file into m4a without re-encoding
+    ref: https://www.baeldung.com/linux/ffmpeg-audio-from-video#1-extracting-audio-without-re-encoding
+    """
+    # Create output directory if it doesn't exist
+    output_dir = output_dir if output_dir else os.path.dirname(video_path)
+    vname, vext = os.path.splitext(os.path.basename(video_path))
+    output_dir = os.path.join(output_dir, vname)
+    output_fname = os.path.join(output_dir, vname + ".m4a")
+    if os.path.isfile(output_fname):
+        if overwrite:
+            os.remove(output_fname)
+            logger.warning(f"removed existing data: {output_fname}")
+        else:
+            logger.error(f"overwrite is false and data already exists!")
+            return None
+    os.makedirs(output_dir, exist_ok=True)
+    # Construct the ffmpeg-python pipeline
+    stream = ffmpeg.input(video_path)
+    config_dict = {"map": "0:a", "acodec": "copy"}
+    stream = ffmpeg.output(stream, output_fname, **config_dict)
+    # Execute the ffmpeg command
+    try:
+        ffmpeg.run(stream, capture_stdout=True, capture_stderr=True)
+        logger.success(f"audio extracted to {output_fname}")
+        return output_fname
+    except ffmpeg.Error as e:
+        logger.error(f"Error executing FFmpeg command: {e.stderr.decode()}")
+    return None
+@app.command()
+def extract_frames(
+    video_path: str,
+    output_dir: str = None,
+    fps: int = None,
+    every_x: int = None,
+    overwrite: bool = False,
+    append: bool = False,
+    im_name_pattern: str = "frame_%05d.jpg",
+):
+    """extract frames as images into output_dir and return the list of frames' paths
+    Args:
+        output_dir: if not provided, will be in video_name/keyframes/
+    """
+    # Create output directory if it doesn't exist
+    vname, vext = os.path.splitext(os.path.basename(video_path))
+    output_dir = output_dir if output_dir else os.path.dirname(video_path)
+    output_dir = os.path.join(output_dir, vname, "keyframes")
+    if os.path.isdir(output_dir):
+        if overwrite:
+            shutil.rmtree(output_dir)
+            logger.warning(f"removed existing data: {output_dir}")
+        elif not append:
+            logger.error(f"overwrite is false and data already exists in {output_dir}!")
+            return None
+    os.makedirs(output_dir, exist_ok=True)
+    # Construct the ffmpeg-python pipeline
+    stream = ffmpeg.input(video_path)
+    config_dict = {
+        "vsync": 0,  # preserves the original timestamps
+        "frame_pts": 1,  # set output file's %d to the frame's PTS
+    }
+    if fps:
+        # check FPS
+        vid_fps = get_fps_ffmpeg(video_path)
+        fps = min(vid_fps, fps)
+        logger.info(f"{vname}{vext} FPS: {vid_fps}, extraction FPS: {fps}")
+        config_dict["vf"] = f"fps={fps}"
+    elif every_x:
+        config_dict["vf"] = f"select=not(mod(n\,{every_x}))"
+    logger.info(
+        f"Extracting Frames into {output_dir} with these configs: \n{config_dict}"
+    )
+    stream = ffmpeg.output(stream, f"{output_dir}/{im_name_pattern}", **config_dict)
+    # Execute the ffmpeg command
+    try:
+        ffmpeg.run(stream, capture_stdout=True, capture_stderr=True)
+        frames = [
+            os.path.join(output_dir, f)
+            for f in os.listdir(output_dir)
+            if f.endswith(".jpg")
+        ]
+        logger.success(f"{len(frames)} frames extracted to {output_dir}")
+        return frames
+    except ffmpeg.Error as e:
+        logger.error(f"Error executing FFmpeg command: {e.stderr.decode()}")
+    return None
+if __name__ == "__main__":
+    app()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+ffmpeg-python>=0.2.0
+imageio[ffmpeg]>=2.37.0
+loguru>=0.7.3
+pydantic
+retrying>=1.3.4
+samv2==0.0.4
+validators>=0.35.0

samv2_handler.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import os, shutil
+import numpy as np
+from PIL import Image
+from typing import Literal, Any, Union, Generic, List
+from pydantic import BaseModel
+from sam2.build_sam import build_sam2, build_sam2_video_predictor
+from sam2.sam2_image_predictor import SAM2ImagePredictor
+from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+from sam2.utils.misc import variant_to_config_mapping
+from sam2.utils.visualization import show_masks
+from ffmpeg_extractor import extract_frames, logger
+from toolbox.vid_utils import VidInfo
+from toolbox.mask_encoding import b64_mask_encode
+variant_checkpoints_mapping = {
+    "tiny": "checkpoints/sam2_hiera_tiny.pt",
+    "small": "checkpoints/sam2_hiera_small.pt",
+    "base_plus": "checkpoints/sam2_hiera_base_plus.pt",
+    "large": "checkpoints/sam2_hiera_large.pt",
+}
+class bbox_xyxy(BaseModel):
+    x0: Union[int, float]
+    y0: Union[int, float]
+    x1: Union[int, float]
+    y1: Union[int, float]
+class point_xy(BaseModel):
+    x: Union[int, float]
+    y: Union[int, float]
+def mask_to_xyxy(mask: np.ndarray) -> tuple:
+    """Convert a binary mask of shape (h, w) to
+    xyxy bounding box format (top-left and bottom-right coordinates).
+    """
+    ys, xs = np.where(mask)
+    if len(xs) == 0 or len(ys) == 0:
+        logger.warning("mask_to_xyxy: No object found in the mask")
+        return None
+    x_min = np.min(xs)
+    y_min = np.min(ys)
+    x_max = np.max(xs)
+    y_max = np.max(ys)
+    xyxy = (x_min, y_min, x_max, y_max)
+    xyxy = tuple([int(i) for i in xyxy])
+    return xyxy
+def load_sam_image_model(
+    # variant: Literal[*variant_checkpoints_mapping.keys()],
+    variant: Literal["tiny", "small", "base_plus", "large"],
+    device: str = "cpu",
+    auto_mask_gen: bool = False,
+) -> SAM2ImagePredictor:
+    model = build_sam2(
+        config_file=variant_to_config_mapping[variant],
+        ckpt_path=variant_checkpoints_mapping[variant],
+        device=device,
+    )
+    return (
+        SAM2AutomaticMaskGenerator(model)
+        if auto_mask_gen
+        else SAM2ImagePredictor(sam_model=model)
+    )
+def load_sam_video_model(
+    variant: Literal["tiny", "small", "base_plus", "large"] = "small",
+    device: str = "cpu",
+) -> Any:
+    return build_sam2_video_predictor(
+        config_file=variant_to_config_mapping[variant],
+        ckpt_path=variant_checkpoints_mapping[variant],
+        device=device,
+    )
+def run_sam_im_inference(
+    model: Any,
+    image: Image.Image,
+    points: Union[List[point_xy], List[dict]] = [],
+    point_labels: List[int] = [],
+    bboxes: Union[List[bbox_xyxy], List[dict]] = [],
+    get_pil_mask: bool = False,
+    b64_encode_mask: bool = False,
+):
+    """returns a list of np masks, each with the shape (h,w) and dtype uint8"""
+    assert (
+        points or bboxes
+    ), f"SAM2 Image Inference must have either bounding boxes or points. Neither were provided."
+    if points:
+        assert len(points) == len(
+            point_labels
+        ), f"{len(points)} points provided but {len(point_labels)} labels given."
+    # determine multimask_output
+    has_multi = False
+    if points and bboxes:
+        has_multi = True
+    elif points and len(list(set(point_labels))) > 1:
+        has_multi = True
+    elif bboxes and len(bboxes) > 1:
+        has_multi = True
+    # parse provided bboxes
+    bboxes = (
+        [bbox_xyxy(**bbox) if isinstance(bbox, dict) else bbox for bbox in bboxes]
+        if bboxes
+        else []
+    )
+    points = (
+        [point_xy(**p) if isinstance(p, dict) else p for p in points] if points else []
+    )
+    # setup inference
+    image = np.array(image.convert("RGB"))
+    model.set_image(image)
+    box_coords = (
+        np.array([[b.x0, b.y0, b.x1, b.y1] for b in bboxes]) if bboxes else None
+    )
+    point_coords = np.array([[p.x, p.y] for p in points]) if points else None
+    point_labels = np.array(point_labels) if point_labels else None
+    masks, scores, _ = model.predict(
+        box=box_coords,
+        point_coords=point_coords,
+        point_labels=point_labels,
+        multimask_output=has_multi,
+    )
+    # mask here is of shape (X, h, w) of np array, X = number of masks
+    if get_pil_mask:
+        return show_masks(image, masks, scores=None, display_image=False)
+    else:
+        output_masks = []
+        for i, mask in enumerate(masks):
+            if mask.ndim > 2:  # shape (3, h, w)
+                mask = np.transpose(mask, (1, 2, 0))  # shape (h,w,3)
+                mask = Image.fromarray((mask * 255).astype(np.uint8)).convert("L")
+                output_masks.append(np.array(mask))
+            else:
+                output_masks.append(mask.squeeze().astype(np.uint8))
+        return (
+            [b64_mask_encode(m) for m in output_masks]
+            if b64_encode_mask
+            else output_masks
+        )
+def run_sam_video_inference(
+    model: Any,
+    video_path: str,
+    masks: np.ndarray,
+    device: str = "cpu",
+    sample_fps: int = None,
+    every_x: int = None,
+    do_tidy_up: bool = False,
+    drop_mask: bool = True,
+):
+    # put video frames into directory
+    # TODO:
+    # change frame size
+    # async frame load
+    l_frames_fp = extract_frames(
+        video_path,
+        fps=sample_fps,
+        every_x=every_x,
+        overwrite=True,
+        im_name_pattern="%05d.jpg",
+    )
+    vframes_dir = os.path.dirname(l_frames_fp[0])
+    vinfo = VidInfo(video_path)
+    w = vinfo["frame_width"]
+    h = vinfo["frame_height"]
+    inference_state = model.init_state(video_path=vframes_dir, device=device)
+    for i, mask in enumerate(masks):
+        model.add_new_mask(
+            inference_state=inference_state, frame_idx=0, obj_id=i, mask=mask
+        )
+    masks_generator = model.propagate_in_video(inference_state)
+    detections = []
+    for i, tracker_ids, mask_logits in masks_generator:
+        masks = (mask_logits > 0.0).cpu().numpy().astype(np.uint8)
+        for id, mask in zip(tracker_ids, masks):
+            mask = mask.squeeze().astype(np.uint8)
+            xyxy = mask_to_xyxy(mask)
+            if not xyxy:  # mask is empty
+                logger.debug(f"track_id {id} is missing mask at frame {i}")
+                continue
+            x0, y0, x1, y1 = xyxy
+            det = {  # miro's detections format for videos
+                "frame": i,
+                "track_id": id,
+                "x": x0 / w,
+                "y": y0 / h,
+                "w": (x1 - x0) / w,
+                "h": (y1 - y0) / h,
+                "conf": 1,
+            }
+            if not drop_mask:
+                det["mask_b64"] = b64_mask_encode(mask)
+            detections.append(det)
+    if do_tidy_up:
+        # remove vframes_dir
+        shutil.rmtree(vframes_dir)
+    return detections

toolbox/mask_encoding.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import base64, os, io, random, time
+from PIL import Image
+import numpy as np
+def b64_mask_encode(mask_np_arr, tmp_dir = '/tmp/miro/mask_encoding/'):
+    '''
+    turn a binary mask in numpy into a base64 string
+    '''
+    mask_im = Image.fromarray(np.array(mask_np_arr).astype(np.uint8)*255)
+    mask_im = mask_im.convert(mode = '1') # convert to 1bit image
+    if not os.path.isdir(tmp_dir):
+        print(f'b64_mask_encode: making tmp dir for mask encoding...')
+        os.makedirs(tmp_dir)
+    timestr = time.strftime("%Y%m%d-%H%M%S")
+    hash_str = random.getrandbits(128)
+    tmp_fname = tmp_dir + f'{timestr}_{hash_str}_mask.png'
+    mask_im.save(tmp_fname)
+    return base64.b64encode(open(tmp_fname, 'rb').read())
+def b64_mask_decode(b64_string):
+    '''
+    decode a base64 string back to a binary mask numpy array
+    '''
+    im_bytes = base64.b64decode(b64_string)
+    im_decode = Image.open(io.BytesIO(im_bytes))
+    return np.array(im_decode)
+def get_true_mask(mask_arr, im_w_h:tuple, x0, y0, x1, y1):
+    '''
+    decode the mask of CM output to get a mask that's the same size as source im
+    '''
+    if x0 > im_w_h[0] or x1 > im_w_h[0] or y0 > im_w_h[1] or y1 > im_w_h[1]:
+        raise ValueError(f'get_true_mask: Xs and Ys exceeded im_w_h bound: {im_w_h}')
+    if mask_arr.shape != (y1 - y0, x1 - x0):
+        raise ValueError(f'get_true_mask: Bounding Box h: {y1-y0} w: {x1-x0} does not match mask shape: {mask_arr.shape}')
+    w, h = im_w_h
+    mask = np.zeros((h,w), dtype = np.uint8)
+    mask[y0:y1, x0:x1] = mask_arr
+    return mask

toolbox/vid_utils.py ADDED Viewed

	@@ -0,0 +1,351 @@

+import numpy as np
+from tqdm import tqdm
+import cv2, imageio, ffmpeg, os, time, shutil
+def VidInfo(vid_path):
+	'''
+	returns a dictonary of 'duration', 'fps', 'frame_count', 'frame_height', 'frame_width',
+							'format', 'fourcc'
+	'''
+	vcap = cv2.VideoCapture(vid_path)
+	if not vcap.isOpened():
+		# cannot read video
+		if vid_path.startswith('https://'):
+			# likely a ffmpeg without open-ssl support issue
+			# https://github.com/opencv/opencv-python/issues/204
+			return VidInfo(vid_path.replace('https://','http://'))
+		else:
+			return None
+	info_dict = {
+		'fps' : round(vcap.get(cv2.CAP_PROP_FPS),2), #int(vcap.get(cv2.CAP_PROP_FPS)),
+		'frame_count': int(vcap.get(cv2.CAP_PROP_FRAME_COUNT)), # number of frames should integars
+		'duration': round(
+			int(vcap.get(cv2.CAP_PROP_FRAME_COUNT)) / vcap.get(cv2.CAP_PROP_FPS),
+			2), # round number of seconds to 2 decimals
+		'frame_height': vcap.get(cv2.CAP_PROP_FRAME_HEIGHT),
+		'frame_width': vcap.get(cv2.CAP_PROP_FRAME_WIDTH),
+		'format': vcap.get(cv2.CAP_PROP_FORMAT),
+		'fourcc': vcap.get(cv2.CAP_PROP_FOURCC)
+	}
+	vcap.release()
+	return info_dict
+def VidReader(vid_path, verbose = False, use_imageio = True):
+	'''
+	given a video file path, returns a list of images
+	Args:
+		vid_path: a MP4 file path
+		use_imageio: if true, function returns a ImageIO reader object (RGB);
+					otherwise, a list of CV2 array will be returned
+	'''
+	if use_imageio:
+		vid = imageio.get_reader(vid_path, 'ffmpeg')
+		return vid
+	vcap = cv2.VideoCapture(vid_path)
+	s_time = time.time()
+	# try to determine the total number of frames in Vid
+	frame_count = int(vcap.get(cv2.CAP_PROP_FRAME_COUNT))
+	frame_rate = int(vcap.get(cv2.CAP_PROP_FPS))
+	if verbose:
+		print(f'\t{frame_count} total frames in video {vid_path}')
+		print(f'\t\t FPS: {frame_rate}')
+		print(f'\t\t Video Duration: {frame_count/ frame_rate}s')
+	# loop over frames
+	results = []
+	for i in tqdm(range(frame_count)):
+		grabbed, frame = vcap.read()
+		if grabbed:
+			results.append(frame)
+	# Output
+	r_time = "{:.2f}".format(time.time() - s_time)
+	if verbose:
+		print(f'\t{vid_path} loaded in {r_time} ({frame_count/float(r_time)} fps)')
+	vcap.release()
+	return results
+def get_vid_frame(n, vid_path):
+	'''
+	return frame(s) in np.array specified by i
+	Args:
+		n: list of int
+	'''
+	vreader = VidReader(vid_path, verbose = False, use_imageio = True)
+	fcount = VidInfo(vid_path)['frame_count']
+	if type(n) == list:
+		return [vreader.get_data(i) if i in range(fcount) else None for i in n]
+	elif type(n) == int:
+		return vreader.get_data(n) if n in range(fcount) else None
+	else:
+		raise ValueError(f'n must be either int or list, {type(n)} detected.')
+def vid_slicer(vid_path, output_path, start_frame, end_frame, keep_audio = False, overwrite = False):
+	'''
+	ref https://github.com/kkroening/ffmpeg-python/issues/184#issuecomment-493847192
+	'''
+	if not( os.path.isdir(os.path.dirname(output_path))):
+		raise ValueError(f'output_path directory does not exists: {os.path.dirname(output_path)}')
+	if os.path.isfile(output_path) and not overwrite:
+		warnings.warn(f'{output_path} already exists but overwrite switch is False, nothing done.')
+		return None
+	input_vid = ffmpeg.input(vid_path)
+	vid_info = VidInfo(vid_path)
+	end_frame += 1
+	if keep_audio:
+		vid = (
+			input_vid
+			.trim(start_frame = start_frame, end_frame = end_frame)
+			.setpts('PTS-STARTPTS')
+		)
+		aud = (
+			input_vid
+			.filter_('atrim', start = start_frame / vid_info['fps'], end = end_frame / vid_info['fps'])
+			.filter_('asetpts', 'PTS-STARTPTS')
+		)
+		joined = ffmpeg.concat(vid, aud, v = 1, a =1).node
+		output = ffmpeg.output(joined[0], joined[1], f'{output_path}').overwrite_output()
+		output.run()
+	else:
+		(
+			input_vid
+			.trim   (start_frame = start_frame, end_frame = end_frame )
+			.setpts ('PTS-STARTPTS')
+			.output (f'{output_path}')
+			.overwrite_output()
+			.run()
+		)
+	return output_path
+def vid_resize(vid_path, output_path, width, overwrite = False):
+	'''
+	use ffmpeg to resize the input video to the width given, keeping aspect ratio
+	'''
+	if not( os.path.isdir(os.path.dirname(output_path))):
+		raise ValueError(f'output_path directory does not exists: {os.path.dirname(output_path)}')
+	if os.path.isfile(output_path) and not overwrite:
+		warnings.warn(f'{output_path} already exists but overwrite switch is False, nothing done.')
+		return None
+	input_vid = ffmpeg.input(vid_path)
+	vid = (
+		input_vid
+		.filter('scale', width, -1)
+		.output(output_path)
+		.overwrite_output()
+		.run()
+	)
+	return output_path
+def vid_reduce_framerate(vid_path, output_path, new_fps, overwrite = False):
+	'''
+	use ffmpeg to resize the input video to the width given, keeping aspect ratio
+	'''
+	if not( os.path.isdir(os.path.dirname(output_path))):
+		raise ValueError(f'output_path directory does not exists: {os.path.dirname(output_path)}')
+	if os.path.isfile(output_path) and not overwrite:
+		warnings.warn(f'{output_path} already exists but overwrite switch is False, nothing done.')
+		return None
+	input_vid = ffmpeg.input(vid_path)
+	vid = (
+		input_vid
+		.filter('fps', fps = new_fps, round = 'up')
+		.output(output_path)
+		.overwrite_output()
+		.run()
+	)
+	return output_path
+def seek_frame_count(VidReader, cv2_frame_count, guess_within = 0.1,
+	seek_rate = 1, bDebug = False):
+	'''
+	imageio/ffmpeg frame count could be different than cv2. this function
+	returns the true frame count in the given vid reader. Returns None if frame
+	count can't be determined
+	Args:
+		VidReader: ImageIO video reader object with method .get_data()
+		cv2_frame_count: frame count from cv2
+		guess_within: look for actual frame count within X% of cv2_frame_count
+	'''
+	max_guess = int(cv2_frame_count * (1-guess_within))
+	seek_rate = max(seek_rate, 1)
+	pbar = reversed(range(max_guess, cv2_frame_count, seek_rate))
+	if bDebug:
+		pbar = tqdm(pbar, desc = f'seeking frame')
+		print(f'seeking from {max_guess} to {cv2_frame_count} with seek_rate of {seek_rate}')
+	for i in pbar:
+		try:
+			im = VidReader.get_data(i)
+		except IndexError:
+			if bDebug:
+				print(f'{i} not found.')
+			continue
+		# Frame Found
+		if i+1 == cv2_frame_count:
+			print(f'seek_frame_count: found frame count at {i+1}')
+			return i + 1
+		else:
+			return seek_frame_count(VidReader, cv2_frame_count = i + seek_rate,
+				guess_within= seek_rate / (i + seek_rate),
+				seek_rate= int(seek_rate/2),
+				bDebug = bDebug)
+	return None
+def VidWriter(lFrames, output_path, strFourcc = 'MP4V', verbose = False, intFPS = 20, crf = None,
+				use_imageio = False):
+	'''
+	Given a list of images in numpy array format, it outputs a MP4 file
+	Args:
+		lFrames: list of numpy arrays or filename
+		output_path: a MP4 file path
+		strFourcc: four letter video codec; XVID is more preferable. MJPG results in high size video. X264 gives very small size video; see https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html
+		crf: Constant Rate Factor for ffmpeg video compression
+	'''
+	s_time = time.time()
+	if not output_path.endswith('.mp4'):
+		raise ValueError(f'VidWriter: only mp4 video output supported.')
+	if crf:
+		crf = int(crf)
+		if crf > 24 or crf < 18:
+			raise ValueError(f'VidWriter: crf must be between 18 and 24')
+	if not os.path.exists(os.path.dirname(output_path)):
+		output_dir = os.path.dirname(output_path)
+		print(f'\t{output_dir} does not exist.\n\tCreating video file output directory: {output_dir}')
+		os.makedirs(output_dir)
+	if use_imageio:
+		writer = imageio.get_writer(output_path, fps = intFPS)
+		for frame in tqdm(lFrames, desc = "Writing video using ImageIO"):
+			if not type(frame) == np.ndarray:
+				# read from filename
+				if not os.path.isfile(frame):
+					raise ValueError(f'VidWriter: lFrames must be list of images (np.array) or filenames')
+				frame = imageio.imread(frame)
+			writer.append_data(frame)
+		writer.close()
+	else:
+		#init OpenCV Vid Writer:
+		H , W = lFrames[0].shape[:2]
+		#fourcc = cv2.VideoWriter_fourcc(*'MP4V')
+		fourcc = cv2.VideoWriter_fourcc(*strFourcc)
+		if verbose:
+			print(f'\tEncoding using fourcc: {strFourcc}')
+		writer = cv2.VideoWriter(output_path, fourcc, fps = intFPS, frameSize = (W, H), isColor = True)
+		for frame in tqdm(lFrames, desc = "Writing video using OpenCV"):
+			writer.write(frame)
+		writer.release()
+	# Output
+	r_time = "{:.2f}".format( max(time.time() - s_time, 0.01))
+	if verbose:
+		print(f'\t{output_path} written in {r_time} ({len(lFrames)/float(r_time)} fps)')
+	if crf:
+		if verbose:
+			print(f'\tCompressing {output_path} with FFmpeg using crf: {crf}')
+		isCompressed = VidCompress(output_path, crf = crf, use_ffmpy = False)
+		if verbose:
+			print(f'\tCompressed: {isCompressed}')
+	return output_path
+def im_dir_to_video(im_dir, output_path, fps, tup_im_extension = ('.jpg'),
+		max_long_edge = 600, filename_len = 6, pixel_format = 'yuv420p',
+		tqdm_func = tqdm):
+	'''turn a directory of images into video using ffmpeg
+		ref: https://github.com/kkroening/ffmpeg-python/issues/95#issuecomment-401428324
+	Args:
+		pixel_format: for list of supported formats see https://en.wikipedia.org/wiki/FFmpeg#Pixel_formats
+		filename_len: ensure frame number are zero padded; 0 will skip this step
+	'''
+	if filename_len:
+		# Ensure Filenames are Zero padded
+		l_im_fp = [f for f in os.listdir(im_dir) if f.endswith(tup_im_extension)]
+		l_im_fp = sorted(l_im_fp, key = lambda f: int(f.split('.')[0]))
+		for f in tqdm_func(l_im_fp, desc = 'ensuring image filenames are zero padded'):
+			fname, fext = os.path.splitext(f)
+			padded_f = fname.zfill(filename_len) + fext
+			if not os.path.isfile(os.path.join(im_dir,padded_f)):
+				shutil.move(os.path.join(im_dir, f), os.path.join(im_dir, padded_f))
+				# removed symlink to f as it will duplicate the frames in video generation
+				# os.symlink(src = os.path.join(im_dir, padded_f), dst = os.path.join(im_dir, f))
+			#TODO: ensure image size are divisible by 2
+	im_dir += '' if im_dir.endswith('/') else '/'
+	im_stream_string = f'{im_dir}*.jpg'
+	# we need to escape special characters
+	im_stream_string = im_stream_string.translate(
+							str.maketrans(
+								{'[': r'\[',
+								']': r'\]'})
+						)
+	r = (
+		ffmpeg
+		.input(im_stream_string, pattern_type = 'glob', framerate=fps)
+		.filter('format', pixel_format)
+		# .filter('pad', 'ceil(iw/2)*2:ceil(ih/2)*2')
+		.output(output_path)
+		.run()
+	)
+	return output_path
+#
+# def VidCompress(input_path, output_path = None, crf = 24, use_ffmpy = False):
+# 	'''
+# 	Compress input_path video (mp4 only) using ffmpy
+# 	crf: Constant Rate Factor for ffmpeg video compression, must be between 18 and 24
+# 	use_ffmpy: use ffmpy instead of commandline call to ffmpeg
+# 	'''
+# 	if not input_path.endswith('.mp4'):
+# 		print(f'\tFATAL: only mp4 videos supported.')
+# 		return None
+#
+# 	output_fname = output_path if output_path else input_path
+# 	tmp_fname = input_path.replace(".mp4","_tmp.mp4")
+# 	os.rename(input_path, tmp_fname)
+#
+# 	try:
+# 		if not use_ffmpy:
+# 			#os.popen(f'ffmpeg -i {tmp_fname} -vcodec libx264 -crf {crf} {output_fname}')
+#
+# 			cmdOut = subprocess.Popen(['ffmpeg', '-i', tmp_fname, '-vcodec', 'libx264', '-crf', str(crf), output_fname],
+# 										stdout = subprocess.PIPE,
+# 										stderr = subprocess.STDOUT)
+# 			stdout, stderr = cmdOut.communicate()
+# 			if not stderr:
+# 				os.remove(tmp_fname)
+# 				return True
+# 			else:
+# 				return False
+# 		else:
+# 			ff = FFmpeg(
+# 					inputs = {tmp_fname : None},
+# 					outputs = {output_fname : f'-vcodec libx264 -crf {crf}'}
+# 					)
+# 			ff.run()
+#
+# 			os.remove(tmp_fname)
+# 			return True
+#
+# 	except OSError as e:
+# 		print(f'\tWARNING: Compression Failed; OSError\n\tLikely out of RAM\n\tError Msg: {e}')
+# 		os.rename(tmp_fname, output_fname)
+# 		return False