Spaces:

HikariDawn
/

This-and-That

Running on Zero

App Files Files Community

This-and-That / scripts /generate_traj.py

HikariDawn777

feat: initial push

59b2a81 4 months ago

raw

history blame

24.9 kB

	import sys
	import argparse
	import copy
	import os, shutil
	import imageio
	import cv2
	from PIL import Image, ImageDraw
	import os.path as osp
	import random
	import numpy as np
	import torch.multiprocessing as mp
	from multiprocessing import set_start_method
	import math, time, gc
	import torch
	import torch.nn.functional as F
	import matplotlib.pyplot as plt
	from segment_anything import SamAutomaticMaskGenerator, SamPredictor, sam_model_registry


	# Import files from the local path
	root_path = os.path.abspath('.')
	sys.path.append(root_path)
	from config.flowformer_config import get_cfg
	from flowformer_code.utils import flow_viz, frame_utils
	from flowformer_code.utils.utils import InputPadder
	from flowformer_code.FlowFormer import build_flowformer




	TRAIN_SIZE = [432, 960]

	def show_anns(anns):
	if len(anns) == 0:
	return

	sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
	ax = plt.gca()
	ax.set_autoscale_on(True)

	img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))
	img[:,:,3] = 0
	for ann in sorted_anns:
	m = ann['segmentation']
	color_mask = np.concatenate([np.random.random(3), [0.35]])
	img[m] = color_mask

	return img*255


	def show_mask(mask, random_color=False):
	if random_color:
	color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
	else:
	color = np.array([30/255, 144/255, 255/255, 0.6])
	h, w = mask.shape[-2:]
	mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)

	return mask_image * 255


	def compute_grid_indices(image_shape, patch_size=TRAIN_SIZE, min_overlap=20):
	if min_overlap >= TRAIN_SIZE[0] or min_overlap >= TRAIN_SIZE[1]:
	raise ValueError(
	f"Overlap should be less than size of patch (got {min_overlap}"
	f"for patch size {patch_size}).")
	if image_shape[0] == TRAIN_SIZE[0]:
	hs = list(range(0, image_shape[0], TRAIN_SIZE[0]))
	else:
	hs = list(range(0, image_shape[0], TRAIN_SIZE[0] - min_overlap))
	if image_shape[1] == TRAIN_SIZE[1]:
	ws = list(range(0, image_shape[1], TRAIN_SIZE[1]))
	else:
	ws = list(range(0, image_shape[1], TRAIN_SIZE[1] - min_overlap))

	# Make sure the final patch is flush with the image boundary
	hs[-1] = image_shape[0] - patch_size[0]
	ws[-1] = image_shape[1] - patch_size[1]
	return [(h, w) for h in hs for w in ws]



	def compute_flow(model, image1, image2, weights=None):
	print(f"computing flow...")

	image_size = image1.shape[1:]

	image1, image2 = image1[None].cuda(), image2[None].cuda()

	hws = compute_grid_indices(image_size)
	if weights is None: # no tile
	padder = InputPadder(image1.shape)
	image1, image2 = padder.pad(image1, image2)

	flow_pre, _ = model(image1, image2)

	flow_pre = padder.unpad(flow_pre)
	flow = flow_pre[0].permute(1, 2, 0).cpu().numpy()
	else: # tile
	flows = 0
	flow_count = 0

	for idx, (h, w) in enumerate(hws):
	image1_tile = image1[:, :, h:h+TRAIN_SIZE[0], w:w+TRAIN_SIZE[1]]
	image2_tile = image2[:, :, h:h+TRAIN_SIZE[0], w:w+TRAIN_SIZE[1]]
	flow_pre, _ = model(image1_tile, image2_tile)
	padding = (w, image_size[1]-w-TRAIN_SIZE[1], h, image_size[0]-h-TRAIN_SIZE[0], 0, 0)
	flows += F.pad(flow_pre * weights[idx], padding)
	flow_count += F.pad(weights[idx], padding)

	flow_pre = flows / flow_count
	flow = flow_pre[0].permute(1, 2, 0).cpu().numpy()

	return flow


	def compute_adaptive_image_size(image_size):
	target_size = TRAIN_SIZE
	scale0 = target_size[0] / image_size[0]
	scale1 = target_size[1] / image_size[1]

	if scale0 > scale1:
	scale = scale0
	else:
	scale = scale1

	image_size = (int(image_size[1] * scale), int(image_size[0] * scale))

	return image_size


	def prepare_image(viz_root_dir, fn1, fn2, keep_size):
	print(f"preparing image...")

	image1 = frame_utils.read_gen(fn1)
	image2 = frame_utils.read_gen(fn2)
	image1 = np.array(image1).astype(np.uint8)[..., :3]
	image2 = np.array(image2).astype(np.uint8)[..., :3]
	if not keep_size:
	dsize = compute_adaptive_image_size(image1.shape[0:2])
	image1 = cv2.resize(image1, dsize=dsize, interpolation=cv2.INTER_CUBIC)
	image2 = cv2.resize(image2, dsize=dsize, interpolation=cv2.INTER_CUBIC)
	image1 = torch.from_numpy(image1).permute(2, 0, 1).float()
	image2 = torch.from_numpy(image2).permute(2, 0, 1).float()


	dirname = osp.dirname(fn1)
	filename = osp.splitext(osp.basename(fn1))[0]

	viz_dir = osp.join(viz_root_dir, dirname)
	# if not osp.exists(viz_dir):
	# os.makedirs(viz_dir)

	viz_fn = osp.join(viz_dir, filename + '.png')

	return image1, image2, viz_fn


	def build_model():
	print(f"building model...")
	cfg = get_cfg()
	model = torch.nn.DataParallel(build_flowformer(cfg))
	model.load_state_dict(torch.load(cfg.model))

	model.cuda()
	model.eval()

	return model


	def filter_uv(flow, threshold_factor = 0.2):
	u = flow[:,:,0]
	v = flow[:,:,1]

	rad = np.sqrt(np.square(u) + np.square(v))
	rad_max = np.max(rad)

	threshold = threshold_factor * rad_max
	flow[:,:,0][rad < threshold] = 0
	flow[:,:,1][rad < threshold] = 0

	return flow


	def visualize_traj(base_img, traj_path, connect_points = True):
	target_vertical, target_horizontal = traj_path[-1]

	if connect_points and len(traj_path) > 1:
	# Draw a line to connect two point to show motion direction
	start_coordinate = (traj_path[-2][1], traj_path[-2][0])
	end_coordinate = (traj_path[-1][1], traj_path[-1][0])
	pil_img = Image.fromarray(base_img)

	# Draw the line
	color = 'red'
	draw = ImageDraw.Draw(pil_img)
	draw.line([start_coordinate, end_coordinate], fill = color, width = 3)

	base_img = np.array(pil_img)


	# Draw a green dot only for the start point
	if len(traj_path) == 1:
	dot_range = 3
	for i in range(-1*dot_range, dot_range+1):
	for j in range(-1*dot_range, dot_range+1):
	dil_vertical, dil_horizontal = target_vertical + i, target_horizontal + j
	if (0 <= dil_vertical and dil_vertical < base_img.shape[0]) and (0 <= dil_horizontal and dil_horizontal < base_img.shape[1]):
	base_img[dil_vertical][dil_horizontal] = [0, 128, 0]
	else:
	print("The traj is out of boundary!!!!!!!!!!!!!!!!!!!!! and we won't consider it") # 现在
	return (False, base_img)

	return (True, base_img)



	def calculate_flow(viz_root_dir, store_dir, img_pairs, optical_flow_model, sam_predictor, SAM_positive_sample_num, SAM_negative_sample_num, mask_generator, traj_visualization, keep_size, verbose=False):

	# Trajectory prepare
	traj_path = [] # It collects all points traversed in a temporal order
	is_hard_to_track = False # If this is True, it means that, we have a time in tracking hard to find dx and dy movement. Under this circumstance, we are not very recommended to use it
	hard_track_idxs = set()
	traj_image_lists = []


	# Iterate all image pairs
	for idx, img_pair in enumerate(img_pairs):

	fn1, fn2 = img_pair
	print(f"processing {fn1}, {fn2}...")

	image1, image2, viz_fn = prepare_image(viz_root_dir, fn1, fn2, keep_size) # Be very careful, image1 and image2 may be different resolution shape if keep_size is False
	# Generate the optical flow and filter those that is small motion
	flow_uv = filter_uv(compute_flow(optical_flow_model, image1, image2, None))

	# if verbose:
	# Store the visualization of flow_uv
	# flow_img = flow_viz.flow_to_image(flow_uv)
	# cv2.imwrite("optical_flow_" + str(idx+1) + ".png", flow_img[:, :, [2,1,0]])

	if idx == 0:
	# We will store the first image to memory for further visualization purpose

	# Base img
	# base_img = np.uint8(np.transpose(image1.numpy(), (1,2,0)))

	# SAM figure
	# sam_all = mask_generator.generate(image1)
	# base_img = show_anns(sam_all)
	# base_img = np.transpose(base_img, (1,2,0))

	# Plain white image
	base_img = np.zeros(np.transpose(image1.numpy(), (1,2,0)).shape, dtype=np.uint8)
	base_img.fill(255)




	# Extract moving points (positive point)
	positive_point_cords = []
	nonzeros = np.nonzero(flow_uv) # [(vertical), (horizontal)]
	if len(nonzeros[0]) < SAM_positive_sample_num:
	# We require the number of points to be more than SAM_positive_sample_num
	return False
	positive_orders = np.random.choice(len(nonzeros[0]), SAM_positive_sample_num, replace=False) # we have randomly select instead of use all in the sam_predictor prediction
	for i in range(len(nonzeros[0])):
	if i in positive_orders:
	positive_point_cords.append([nonzeros[1][i], nonzeros[0][i]]) # 根据document来看，这个就应该是先horizontal再vertical，也就是这个顺序
	positive_point_cords = np.array(positive_point_cords)
	positive_point_labels = np.ones(len(positive_point_cords))


	# Define negative sample (outside the optical flow choice)
	if SAM_negative_sample_num != 0:
	skip_prob = 2 * SAM_negative_sample_num / (flow_uv.shape[0]*flow_uv.shape[1] - len(nonzeros[0]))
	negative_point_cords = []
	for i in range(flow_uv.shape[0]):
	for j in range(flow_uv.shape[1]):
	if flow_uv[i][j][0] == 0 and flow_uv[i][j][1] == 0: # 0 means the no motion zone and we have already filter low motion as zero before
	if random.random() < skip_prob:
	negative_point_cords.append([j, i]) # 根据document来看，这个就应该是先horizontal再vertical，也就是这个顺序
	negative_point_cords = np.array(negative_point_cords) # [:SAM_negative_sample_num]
	negative_point_labels = np.zeros(len(negative_point_cords)) # Make sure that it is less than / equals to SAM_negative_sample_num quantity



	################## Use SAM to filter out what we need (& use negative points) ##################
	if idx == 0: # Only consider the first frame now.
	# With sample coordinate
	sam_predictor.set_image(np.uint8(np.transpose(image1.numpy(), (1,2,0))))
	if SAM_negative_sample_num != 0 and len(negative_point_cords) != 0:
	all_point_cords = np.concatenate((positive_point_cords, negative_point_cords), axis=0)
	all_point_labels = np.concatenate((positive_point_labels, negative_point_labels), axis=0)
	else:
	all_point_cords = positive_point_cords
	all_point_labels = positive_point_labels

	masks, scores, logits = sam_predictor.predict(
	point_coords=all_point_cords,
	point_labels=all_point_labels,
	multimask_output=False,
	)
	mask = masks[0] # TODO: 一定要确定我们这里选择了最大的mask，而没有考虑的第二大和其他的, 这里可能有bug，我们默认了第一个就是最大的mask
	# if verbose:
	# cv2.imwrite("mask_"+str(idx+1)+".png", (np.uint8(mask)*255))
	# annotated_img = show_mask(mask)
	# cv2.imwrite("annotated.png", annotated_img)


	################## Choose the one we need as the reference for the future tracking ##################
	# Choose a random point in the mask
	target_zone = np.nonzero(mask) # [(vertical), (horizontal)]
	target_zone = [(target_zone[0][i], target_zone[1][i]) for i in range(len(target_zone[0]))] # Now, the sturcture is [(vertical, horizontal), ...]

	repeat_time = 0
	loop2find = True
	while loop2find:
	loop2find = False
	start_point = target_zone[np.random.choice(len(target_zone), 1, replace=False)[0]]
	start_vertical, start_horizontal = start_point

	repeat_time += 1
	if repeat_time == 100:
	# In some minor case, it may have infinite loop, so we need to manually break if it is looping
	print("We are still hard to find a optimal first point, but we cannot let it loop")
	break

	# Try to choose a start_point that is more centralized (Not close to the border)
	fast_break = False
	for i in range(-15, 15):
	for j in range(-15, 15):
	dil_vertical, dil_horizontal = start_vertical + i, start_horizontal + j
	if (0 <= dil_vertical and dil_vertical < mask.shape[0]) and (0 <= dil_horizontal and dil_horizontal < mask.shape[1]):
	if mask[dil_vertical][dil_horizontal] == 0:
	print("We need to change to a new position for the start p Since this one is close to the border of the object...........")
	loop2find = True
	fast_break = True
	break
	else:
	# We won't want to consider those that is close to the boundary
	print("We need to change to a new position Since this one is close to the border of the image...........")
	loop2find = True
	fast_break = True
	break
	if fast_break:
	break
	traj_path.append(start_point)

	status, base_img = visualize_traj(base_img, traj_path)
	if status == False: # If the traj is False, we won't consider it anymore.
	file = open("log.txt", "a")
	file.write("Invalid start point\n")
	return False

	# Read from the last one in traj
	ref_vertical, ref_horizontal = traj_path[-1][0], traj_path[-1][1]


	# Get the average motion vector for point surrounding (8+1 directions) the ref_point; This is because this is the most accurate statistics
	horizon_lists, vertical_lists = [], []
	start_range, end_range = -5, 5

	# Calculate the average motion based on surrounding motion
	search_times = 0
	while len(horizon_lists) == 0: # If we cannot find a direction, we use average value inside this mask, but we will flag it.
	search_times += 1

	if search_times > 1:
	print("This is hard to track!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! and we have tracked " + str(search_times) + " times")
	# TODO: 如果out of boundary那种，search times到了8-10次的就砍掉那后面frame吧，这种非常inaccurate了，你也可以retrack一个新的点，但是没有什么意义，看整体数量来定吧
	is_hard_to_track = True
	hard_track_idxs.add(idx)

	if abs(start_range) >= flow_uv.shape[0]//2:
	file = open("log.txt", "a")
	file.write("This folder has search all space but didn't find any place to track optical flow\n")
	return False # If we have already search for the whole graph but didn't find anything to track, we discard this sample

	# Search for a larger space which is nearby 我觉得扩大搜索范围应该是最稳定的选择吧
	for i in range(start_range, end_range):
	for j in range(start_range, end_range):
	target_vertical, target_horizontal = ref_vertical + i, ref_horizontal + j
	if 0 <= target_vertical and target_vertical < flow_uv.shape[0] and 0 <= target_horizontal and target_horizontal < flow_uv.shape[1]:
	if flow_uv[target_vertical, target_horizontal, 0] == 0 or flow_uv[target_vertical, target_horizontal, 1] == 0:
	continue # Ignore zero vector to ensure only calculate moving position
	horizon_lists.append(flow_uv[target_vertical, target_horizontal, 0]) # Horizontal motion strength
	vertical_lists.append(flow_uv[target_vertical, target_horizontal, 1]) # Vertical motion strength

	# If there isn't any to search, we kepp on a larger space
	start_range -= 10
	end_range += 10

	average_dx = sum(horizon_lists)/len(horizon_lists)
	average_dy = sum(vertical_lists)/len(vertical_lists)
	print("average movement is ", (average_dx, average_dy))
	traj_path.append(( int(traj_path[-1][0] + average_dy), int(traj_path[-1][1] + average_dx))) # Append the motion in independent order

	print(traj_path)


	##################### Visualize the trajectory path (Debug Purpose) #####################
	status, base_img = visualize_traj(base_img, traj_path)
	if status == False: # If the traj is False, we won't consider it anymore.
	return False

	cv2.imwrite(os.path.join(store_dir, "traj_path.png"), cv2.cvtColor(base_img, cv2.COLOR_BGR2RGB))

	if traj_visualization:
	status, single_traj_img = visualize_traj(np.uint8(np.transpose(image1.numpy(), (1,2,0))), traj_path[:-1], connect_points=False)
	if status == False: # If the traj is False, we won't consider it anymore.
	return False

	traj_write_path = os.path.join(store_dir, "traj_"+str(idx)+".png")
	# cv2.imwrite(traj_write_path, cv2.cvtColor(single_traj_img, cv2.COLOR_BGR2RGB))
	traj_image_lists.append(traj_write_path)


	# if traj_visualization:
	# images = []
	# for filename in traj_image_lists:
	# images.append(imageio.imread(filename))
	# # os.remove(filename) # Remove when used
	# imageio.mimsave(os.path.join(store_dir, 'traj_motion.gif'), images, duration=0.05)


	# TODO: 可以如果hard to track，就aggressivly多试即便，我们根据这个hard_track_idxs的长度来粗略判断哪个最好，三次里面选最好的
	if is_hard_to_track:
	if len(hard_track_idxs) >= len(img_pairs)//3: # If more than half of the traj is hard to track, we need to consider discard this one
	file = open("log.txt", "a")
	file.write("we have a lot of times hard to find dx and dy movement. Under this circumstance, we are not very recommended to use the track\n")
	return False


	# Write a file store all position for further utilization
	txt_path = os.path.join(store_dir, "traj_data.txt")
	if os.path.exists(txt_path):
	os.remove(txt_path)
	file = open(txt_path, "a")
	for traj in traj_path:
	file.write(str(traj[0]) + " " + str(traj[1]) + "\n")
	# Save in numpy information
	# with open(os.path.join(store_dir, 'traj_data.npy'), 'wb') as f:
	# np.save(f, flow_uv)
	print("We write ", traj_path)
	return True



	def manage_seq_range(input_dir, store_dir, total_frame_needed):

	lists = os.listdir(input_dir)
	lists = lists[2:-2]
	num_frames_input = len(lists)

	if num_frames_input < total_frame_needed:
	print("The number of frames is too short for constructing the sequnece length needed")
	return False


	division_factor = num_frames_input // total_frame_needed
	remain_frame = num_frames_input % total_frame_needed

	gaps = [division_factor for _ in range(total_frame_needed)]
	for idx in range(remain_frame):
	gaps[idx] += 1


	cur_idx = 2
	for global_idx, gap in enumerate(gaps):
	source_path = os.path.join(input_dir, "im_"+str(cur_idx)+".jpg")
	destination_path = os.path.join(store_dir, "im_"+str(global_idx)+".jpg")

	shutil.copyfile(source_path, destination_path)
	cur_idx += gap

	return True


	def generate_pairs(dirname, start_idx, end_idx):
	img_pairs = []
	for idx in range(start_idx, end_idx):
	img1 = osp.join(dirname, f'im_{idx}.jpg')
	img2 = osp.join(dirname, f'im_{idx+1}.jpg')
	# img1 = f'{idx:06}.png'
	# img2 = f'{idx+1:06}.png'
	img_pairs.append((img1, img2))

	return img_pairs


	def process_partial_request(request_list, num_frames, traj_visualization, viz_root_dir):


	# Init the optical flow model
	optical_flow_model = build_model()

	# Init SAM for segmentation task
	model_type = "vit_h"
	weight_path = "pretrained/sam_vit_h_4b8939.pth"
	SAM_positive_sample_num = 20 # How many points we use for the positive sample num ()
	SAM_negative_sample_num = 0 # How many points we use for the negative sample num

	print("In multi processing, we will build an instance of mask_generator independently")
	sam = sam_model_registry[model_type](checkpoint=weight_path).to(device="cuda")
	mask_generator = SamAutomaticMaskGenerator(sam)
	print("In multi processing, we will build an instance of sam_predictor independently")
	sam_predictor = SamPredictor(sam)


	counter = 0
	while True:
	counter += 1
	if counter == 10:
	counter = 0
	gc.collect()
	print("We will sleep here to clear memory")
	time.sleep(5)
	info = request_list[0]
	request_list = request_list[1:]
	if info == None:
	print("This queue ends")
	break


	# Process each sub_input_dir and store the information there
	sub_input_dir = info


	img_pairs = generate_pairs(sub_input_dir, 0, num_frames-1)
	print(img_pairs)

	with torch.no_grad():

	# Calculate the optical flow and return a status to say whther this generated flow is usable
	status = calculate_flow(viz_root_dir, sub_input_dir, img_pairs, optical_flow_model, sam_predictor, SAM_positive_sample_num, SAM_negative_sample_num,
	mask_generator, traj_visualization, keep_size = True)

	# file = open("log.txt", "a")
	print("The status for folder " + sub_input_dir + " is " + str(status) + "\n")

	if status == False:
	# If the status is failed, we will remove it afterwords
	print("The status is Failed, so we won't store this one as one promising data")
	else:
	print("We have successfully process one!")


	if __name__ == '__main__':

	# Manage the paramter
	parser = argparse.ArgumentParser()
	parser.add_argument('--input_dir', default = '../validation_flow14/')
	parser.add_argument('--num_workers', type = int, default = 1) # starting index of the image sequence
	parser.add_argument('--viz_root_dir', default = 'viz_results')
	parser.add_argument('--traj_visualization', default = True) # If this is True,

	# list_start = 0
	# list_end = 25000
	num_frames = 14

	args = parser.parse_args()
	input_dir = args.input_dir
	num_workers = args.num_workers
	viz_root_dir = args.viz_root_dir
	traj_visualization = args.traj_visualization



	store_idx = 0
	dir_list = []
	for sub_input_name in sorted(os.listdir(input_dir)):
	sub_input_dir = os.path.join(input_dir, sub_input_name)
	# sub_store_dir = os.path.join(store_dir, "0"*(7-len(str(store_idx)))+str(store_idx))
	store_idx += 1
	dir_list.append(sub_input_dir)

	# Truncate the list to the target
	# dir_list = dir_list[list_start:]


	# Use multiprocessing to handle to speed up
	num = math.ceil(len(dir_list) / num_workers)
	for idx in range(num_workers):
	# set_start_method('spawn', force=True)

	request_list = dir_list[:num]
	request_list.append(None)
	dir_list = dir_list[num:]


	process_partial_request(request_list, num_frames, traj_visualization, viz_root_dir) # This is for debug purpose
	# p = mp.Process(target=process_partial_request, args=(request_list, num_frames, traj_visualization, viz_root_dir, ))
	# p.start()

	print("Submitted all jobs!")
	# p.join() # 好像不加这个multiprocess就莫名自己结束了
	print("All task finished!")