sindhuhegde commited on
Commit
c6e1104
·
1 Parent(s): 8eab689

Update app

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -647,6 +647,7 @@ def process_video(video_path, num_avg_frames):
647
  wav_file, fps, vid_path_processed, status = preprocess_video(video_path, result_folder_input)
648
  if status != "success":
649
  return status, None
 
650
 
651
  # Resample the video to 25 fps if it is not already 25 fps
652
  print("FPS of video: ", fps)
@@ -666,7 +667,7 @@ def process_video(video_path, num_avg_frames):
666
  frames, status = load_video_frames(vid_path)
667
  if status != "success":
668
  return status, None
669
-
670
 
671
  if len(frames) < num_avg_frames:
672
  return "Error: The input video is too short. Please use a longer input video.", None
@@ -675,6 +676,7 @@ def process_video(video_path, num_avg_frames):
675
  kp_dict, status = get_keypoints(frames)
676
  if status != "success":
677
  return status, None
 
678
 
679
  status = check_visible_gestures(kp_dict)
680
  if status != "success":
@@ -689,12 +691,14 @@ def process_video(video_path, num_avg_frames):
689
  rgb_frames = np.transpose(rgb_frames, (4, 0, 1, 2, 3))
690
  rgb_frames = torch.FloatTensor(np.array(rgb_frames)).unsqueeze(0)
691
  B = rgb_frames.size(0)
 
692
 
693
  # Load spectrograms
694
  spec, orig_spec, status = load_spectrograms(wav_file, num_frames, window_frames=25)
695
  if status != "success":
696
  return status, None
697
  spec = torch.FloatTensor(spec).unsqueeze(0).unsqueeze(0).permute(0, 1, 2, 4, 3)
 
698
 
699
  # Create input windows
700
  video_sequences = torch.cat([rgb_frames[:, :, i] for i in range(rgb_frames.size(2))], dim=0)
@@ -703,6 +707,7 @@ def process_video(video_path, num_avg_frames):
703
  # Load the trained model
704
  model = Transformer_RGB()
705
  model = load_checkpoint(CHECKPOINT_PATH, model)
 
706
 
707
  # Process in batches
708
  batch_size = 12
@@ -737,6 +742,7 @@ def process_video(video_path, num_avg_frames):
737
  video_emb = torch.split(video_emb, B, dim=0)
738
  video_emb = torch.stack(video_emb, dim=2)
739
  video_emb = video_emb.squeeze(3)
 
740
 
741
  # Calculate sync offset
742
  pred_offset, status = calc_optimal_av_offset(video_emb, audio_emb, num_avg_frames, model)
 
647
  wav_file, fps, vid_path_processed, status = preprocess_video(video_path, result_folder_input)
648
  if status != "success":
649
  return status, None
650
+ print("Successfully preprocessed the video")
651
 
652
  # Resample the video to 25 fps if it is not already 25 fps
653
  print("FPS of video: ", fps)
 
667
  frames, status = load_video_frames(vid_path)
668
  if status != "success":
669
  return status, None
670
+ print("Successfully extracted the video frames")
671
 
672
  if len(frames) < num_avg_frames:
673
  return "Error: The input video is too short. Please use a longer input video.", None
 
676
  kp_dict, status = get_keypoints(frames)
677
  if status != "success":
678
  return status, None
679
+ print("Successfully extracted the keypoints")
680
 
681
  status = check_visible_gestures(kp_dict)
682
  if status != "success":
 
691
  rgb_frames = np.transpose(rgb_frames, (4, 0, 1, 2, 3))
692
  rgb_frames = torch.FloatTensor(np.array(rgb_frames)).unsqueeze(0)
693
  B = rgb_frames.size(0)
694
+ print("Successfully converted the frames to tensor")
695
 
696
  # Load spectrograms
697
  spec, orig_spec, status = load_spectrograms(wav_file, num_frames, window_frames=25)
698
  if status != "success":
699
  return status, None
700
  spec = torch.FloatTensor(spec).unsqueeze(0).unsqueeze(0).permute(0, 1, 2, 4, 3)
701
+ print("Successfully loaded the spectrograms")
702
 
703
  # Create input windows
704
  video_sequences = torch.cat([rgb_frames[:, :, i] for i in range(rgb_frames.size(2))], dim=0)
 
707
  # Load the trained model
708
  model = Transformer_RGB()
709
  model = load_checkpoint(CHECKPOINT_PATH, model)
710
+ print("Successfully loaded the model")
711
 
712
  # Process in batches
713
  batch_size = 12
 
742
  video_emb = torch.split(video_emb, B, dim=0)
743
  video_emb = torch.stack(video_emb, dim=2)
744
  video_emb = video_emb.squeeze(3)
745
+ print("Successfully extracted GestSync embeddings")
746
 
747
  # Calculate sync offset
748
  pred_offset, status = calc_optimal_av_offset(video_emb, audio_emb, num_avg_frames, model)