Spaces:

sindhuhegde
/

gestsync

Running on Zero

sindhuhegde commited on Aug 26, 2024

Commit

4b11292

1 Parent(s): b63403f

Update app

Files changed (3) hide show

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ from scipy.io.wavfile import write
 import mediapipe as mp
 from protobuf_to_dict import protobuf_to_dict
 import warnings
 mp_holistic = mp.solutions.holistic
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -36,7 +37,7 @@ n_negative_samples = 100
 # Initialize the mediapipe holistic keypoint detection model
 holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
 def preprocess_video(path, result_folder, apply_preprocess, padding=20):
 	'''
@@ -743,7 +744,7 @@ def extract_audio(video, result_folder):
 	return wav_file, "success"
 def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
 	'''
@@ -878,6 +879,7 @@ def save_video(output_tracks, input_frames, wav_file, result_folder):
 	return video_output, "success"
 def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
 	try:

 import mediapipe as mp
 from protobuf_to_dict import protobuf_to_dict
 import warnings
+import spaces
 mp_holistic = mp.solutions.holistic
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 # Initialize the mediapipe holistic keypoint detection model
 holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
+@spaces.GPU(duration=300)
 def preprocess_video(path, result_folder, apply_preprocess, padding=20):
 	'''
 	return wav_file, "success"
+@spaces.GPU(duration=200)
 def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
 	'''
 	return video_output, "success"
+@spaces.GPU(duration=200)
 def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
 	try:

preprocess/inference_preprocess.py CHANGED Viewed

@@ -18,6 +18,8 @@ from ultralytics import YOLO
 from decord import VideoReader
 parser = argparse.ArgumentParser(description="FaceTracker")
 parser.add_argument('--data_dir', type=str, help='directory to save intermediate temp results')
 parser.add_argument('--facedet_scale', type=float, default=0.25, help='Scale factor for face detection')
@@ -162,6 +164,7 @@ def crop_video(opt, track, cropfile, tight_scale=1):
 	return {'track': track, 'proc_track': dets}
 def inference_video(opt, padding=0):
 	videofile = os.path.join(opt.avi_dir, 'video.avi')
 	vidObj = cv2.VideoCapture(videofile)

 from decord import VideoReader
+import spaces
 parser = argparse.ArgumentParser(description="FaceTracker")
 parser.add_argument('--data_dir', type=str, help='directory to save intermediate temp results')
 parser.add_argument('--facedet_scale', type=float, default=0.25, help='Scale factor for face detection')
 	return {'track': track, 'proc_track': dets}
+@spaces.GPU(duration=200)
 def inference_video(opt, padding=0):
 	videofile = os.path.join(opt.avi_dir, 'video.avi')
 	vidObj = cv2.VideoCapture(videofile)

requirements.txt CHANGED Viewed

@@ -10,8 +10,8 @@ protobuf3-to-dict==0.1.5
 python_speech_features==0.6
 scenedetect==0.6.4
 scikit-learn==1.5.1
-torch==1.11.0
-torchvision==0.12.0
 tqdm==4.66.4
 ultralytics==8.2.70
 ultralytics-thop==2.0.0

 python_speech_features==0.6
 scenedetect==0.6.4
 scikit-learn==1.5.1
+torch==2.0.0
+torchvision==0.15.1
 tqdm==4.66.4
 ultralytics==8.2.70
 ultralytics-thop==2.0.0