Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
4b11292
1
Parent(s):
b63403f
Update app
Browse files- app.py +4 -2
- preprocess/inference_preprocess.py +3 -0
- requirements.txt +2 -2
app.py
CHANGED
|
@@ -20,6 +20,7 @@ from scipy.io.wavfile import write
|
|
| 20 |
import mediapipe as mp
|
| 21 |
from protobuf_to_dict import protobuf_to_dict
|
| 22 |
import warnings
|
|
|
|
| 23 |
|
| 24 |
mp_holistic = mp.solutions.holistic
|
| 25 |
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
@@ -36,7 +37,7 @@ n_negative_samples = 100
|
|
| 36 |
# Initialize the mediapipe holistic keypoint detection model
|
| 37 |
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
|
| 38 |
|
| 39 |
-
|
| 40 |
def preprocess_video(path, result_folder, apply_preprocess, padding=20):
|
| 41 |
|
| 42 |
'''
|
|
@@ -743,7 +744,7 @@ def extract_audio(video, result_folder):
|
|
| 743 |
|
| 744 |
return wav_file, "success"
|
| 745 |
|
| 746 |
-
|
| 747 |
def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
|
| 748 |
|
| 749 |
'''
|
|
@@ -878,6 +879,7 @@ def save_video(output_tracks, input_frames, wav_file, result_folder):
|
|
| 878 |
|
| 879 |
return video_output, "success"
|
| 880 |
|
|
|
|
| 881 |
def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
| 882 |
|
| 883 |
try:
|
|
|
|
| 20 |
import mediapipe as mp
|
| 21 |
from protobuf_to_dict import protobuf_to_dict
|
| 22 |
import warnings
|
| 23 |
+
import spaces
|
| 24 |
|
| 25 |
mp_holistic = mp.solutions.holistic
|
| 26 |
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
|
|
| 37 |
# Initialize the mediapipe holistic keypoint detection model
|
| 38 |
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
|
| 39 |
|
| 40 |
+
@spaces.GPU(duration=300)
|
| 41 |
def preprocess_video(path, result_folder, apply_preprocess, padding=20):
|
| 42 |
|
| 43 |
'''
|
|
|
|
| 744 |
|
| 745 |
return wav_file, "success"
|
| 746 |
|
| 747 |
+
@spaces.GPU(duration=200)
|
| 748 |
def get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True):
|
| 749 |
|
| 750 |
'''
|
|
|
|
| 879 |
|
| 880 |
return video_output, "success"
|
| 881 |
|
| 882 |
+
@spaces.GPU(duration=200)
|
| 883 |
def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
| 884 |
|
| 885 |
try:
|
preprocess/inference_preprocess.py
CHANGED
|
@@ -18,6 +18,8 @@ from ultralytics import YOLO
|
|
| 18 |
|
| 19 |
from decord import VideoReader
|
| 20 |
|
|
|
|
|
|
|
| 21 |
parser = argparse.ArgumentParser(description="FaceTracker")
|
| 22 |
parser.add_argument('--data_dir', type=str, help='directory to save intermediate temp results')
|
| 23 |
parser.add_argument('--facedet_scale', type=float, default=0.25, help='Scale factor for face detection')
|
|
@@ -162,6 +164,7 @@ def crop_video(opt, track, cropfile, tight_scale=1):
|
|
| 162 |
|
| 163 |
return {'track': track, 'proc_track': dets}
|
| 164 |
|
|
|
|
| 165 |
def inference_video(opt, padding=0):
|
| 166 |
videofile = os.path.join(opt.avi_dir, 'video.avi')
|
| 167 |
vidObj = cv2.VideoCapture(videofile)
|
|
|
|
| 18 |
|
| 19 |
from decord import VideoReader
|
| 20 |
|
| 21 |
+
import spaces
|
| 22 |
+
|
| 23 |
parser = argparse.ArgumentParser(description="FaceTracker")
|
| 24 |
parser.add_argument('--data_dir', type=str, help='directory to save intermediate temp results')
|
| 25 |
parser.add_argument('--facedet_scale', type=float, default=0.25, help='Scale factor for face detection')
|
|
|
|
| 164 |
|
| 165 |
return {'track': track, 'proc_track': dets}
|
| 166 |
|
| 167 |
+
@spaces.GPU(duration=200)
|
| 168 |
def inference_video(opt, padding=0):
|
| 169 |
videofile = os.path.join(opt.avi_dir, 'video.avi')
|
| 170 |
vidObj = cv2.VideoCapture(videofile)
|
requirements.txt
CHANGED
|
@@ -10,8 +10,8 @@ protobuf3-to-dict==0.1.5
|
|
| 10 |
python_speech_features==0.6
|
| 11 |
scenedetect==0.6.4
|
| 12 |
scikit-learn==1.5.1
|
| 13 |
-
torch==
|
| 14 |
-
torchvision==0.
|
| 15 |
tqdm==4.66.4
|
| 16 |
ultralytics==8.2.70
|
| 17 |
ultralytics-thop==2.0.0
|
|
|
|
| 10 |
python_speech_features==0.6
|
| 11 |
scenedetect==0.6.4
|
| 12 |
scikit-learn==1.5.1
|
| 13 |
+
torch==2.0.0
|
| 14 |
+
torchvision==0.15.1
|
| 15 |
tqdm==4.66.4
|
| 16 |
ultralytics==8.2.70
|
| 17 |
ultralytics-thop==2.0.0
|