model1 / llava /eval /masp_eval /utils /video_utils.py
multitensor's picture
Upload folder using huggingface_hub
bbfa6f6 verified
import os
import copy
import random
import numpy as np
from PIL import Image
def get_image(image_path):
image = Image.open(image_path).convert('RGB')
return image
def load_frames(frames_dir):
results = []
image_files = [(int(os.path.splitext(img)[0]), img) for img in os.listdir(frames_dir) if img.endswith('jpg')]
image_files = sorted(image_files, key=lambda img: img[0])
for frame_name in image_files:
image_path = f"{frames_dir}/{frame_name[1]}"
image = get_image(image_path)
results.append(image)
return results
def uniform_sample(frames, num_segments):
"""
Uniformly samples 10 frames from a list of frames.
Args:
- frames (list): A list of frames.
Returns:
- list: A list containing 10 uniformly sampled frames.
"""
indices = np.linspace(start=0, stop=len(frames) - 1, num=num_segments).astype(int)
frames = [frames[ind] for ind in indices]
return frames
def downsample_frames(frames, interval, keep_first_last=True):
if keep_first_last:
first, last, mid = frames[0], frames[-1], frames[1:-1]
sampled_frames = mid[interval - 1::interval]
ret = [first] + sampled_frames + [last]
else:
# may output empty list, recommend keep first and last frame
ret = frames[interval - 1::interval]
return ret
def sample_frames(frames, num_segments):
frame_indices = list(range(len(frames)))
cand_indices = copy.deepcopy(frame_indices)
intervals = np.linspace(start=0, stop=len(frame_indices), num=num_segments + 1).astype(int)
ranges = []
for idx, interv in enumerate(intervals[:-1]):
ranges.append((interv, intervals[idx + 1] - 1))
# try:
# frame_indices = [cand_indices[random.choice(range(x[0], x[1]))] for x in ranges]
# except:
frame_indices = [cand_indices[x[0]] for x in ranges]
sampled_frames = [frames[indice] for indice in frame_indices]
return sampled_frames