Spaces:

sindhuhegde
/

gestsync

Configuration error

App Files Files Community

sindhuhegde commited on Aug 22, 2024

Commit

c6e1104

1 Parent(s): 8eab689

Update app

Browse files

Files changed (1) hide show

app.py +7 -1

app.py CHANGED Viewed

@@ -647,6 +647,7 @@ def process_video(video_path, num_avg_frames):
 		wav_file, fps, vid_path_processed, status = preprocess_video(video_path, result_folder_input)
 		if status != "success":
 			return status, None
 		# Resample the video to 25 fps if it is not already 25 fps
 		print("FPS of video: ", fps)
@@ -666,7 +667,7 @@ def process_video(video_path, num_avg_frames):
 		frames, status = load_video_frames(vid_path)
 		if status != "success":
 			return status, None
 		if len(frames) < num_avg_frames:
 			return "Error: The input video is too short. Please use a longer input video.", None
@@ -675,6 +676,7 @@ def process_video(video_path, num_avg_frames):
 		kp_dict, status = get_keypoints(frames)
 		if status != "success":
 			return status, None
 		status = check_visible_gestures(kp_dict)
 		if status != "success":
@@ -689,12 +691,14 @@ def process_video(video_path, num_avg_frames):
 		rgb_frames = np.transpose(rgb_frames, (4, 0, 1, 2, 3))
 		rgb_frames = torch.FloatTensor(np.array(rgb_frames)).unsqueeze(0)
 		B = rgb_frames.size(0)
 		# Load spectrograms
 		spec, orig_spec, status = load_spectrograms(wav_file, num_frames, window_frames=25)
 		if status != "success":
 			return status, None
 		spec = torch.FloatTensor(spec).unsqueeze(0).unsqueeze(0).permute(0, 1, 2, 4, 3)
 		# Create input windows
 		video_sequences = torch.cat([rgb_frames[:, :, i] for i in range(rgb_frames.size(2))], dim=0)
@@ -703,6 +707,7 @@ def process_video(video_path, num_avg_frames):
 		# Load the trained model
 		model = Transformer_RGB()
 		model = load_checkpoint(CHECKPOINT_PATH, model)
 		# Process in batches
 		batch_size = 12
@@ -737,6 +742,7 @@ def process_video(video_path, num_avg_frames):
 		video_emb = torch.split(video_emb, B, dim=0)
 		video_emb = torch.stack(video_emb, dim=2)
 		video_emb = video_emb.squeeze(3)
 		# Calculate sync offset
 		pred_offset, status = calc_optimal_av_offset(video_emb, audio_emb, num_avg_frames, model)

 		wav_file, fps, vid_path_processed, status = preprocess_video(video_path, result_folder_input)
 		if status != "success":
 			return status, None
+		print("Successfully preprocessed the video")
 		# Resample the video to 25 fps if it is not already 25 fps
 		print("FPS of video: ", fps)
 		frames, status = load_video_frames(vid_path)
 		if status != "success":
 			return status, None
+		print("Successfully extracted the video frames")
 		if len(frames) < num_avg_frames:
 			return "Error: The input video is too short. Please use a longer input video.", None
 		kp_dict, status = get_keypoints(frames)
 		if status != "success":
 			return status, None
+		print("Successfully extracted the keypoints")
 		status = check_visible_gestures(kp_dict)
 		if status != "success":
 		rgb_frames = np.transpose(rgb_frames, (4, 0, 1, 2, 3))
 		rgb_frames = torch.FloatTensor(np.array(rgb_frames)).unsqueeze(0)
 		B = rgb_frames.size(0)
+		print("Successfully converted the frames to tensor")
 		# Load spectrograms
 		spec, orig_spec, status = load_spectrograms(wav_file, num_frames, window_frames=25)
 		if status != "success":
 			return status, None
 		spec = torch.FloatTensor(spec).unsqueeze(0).unsqueeze(0).permute(0, 1, 2, 4, 3)
+		print("Successfully loaded the spectrograms")
 		# Create input windows
 		video_sequences = torch.cat([rgb_frames[:, :, i] for i in range(rgb_frames.size(2))], dim=0)
 		# Load the trained model
 		model = Transformer_RGB()
 		model = load_checkpoint(CHECKPOINT_PATH, model)
+		print("Successfully loaded the model")
 		# Process in batches
 		batch_size = 12
 		video_emb = torch.split(video_emb, B, dim=0)
 		video_emb = torch.stack(video_emb, dim=2)
 		video_emb = video_emb.squeeze(3)
+		print("Successfully extracted GestSync embeddings")
 		# Calculate sync offset
 		pred_offset, status = calc_optimal_av_offset(video_emb, audio_emb, num_avg_frames, model)