Spaces:
Configuration error
Configuration error
Commit
·
6272c46
1
Parent(s):
b015d09
Update app
Browse files
app.py
CHANGED
|
@@ -554,7 +554,7 @@ def load_checkpoint(path, model):
|
|
| 554 |
model.load_state_dict(new_s)
|
| 555 |
|
| 556 |
if use_cuda:
|
| 557 |
-
model.
|
| 558 |
|
| 559 |
print("Loaded checkpoint from: {}".format(path))
|
| 560 |
|
|
@@ -1323,28 +1323,9 @@ def process_video_syncoffset(video_path, num_avg_frames, apply_preprocess):
|
|
| 1323 |
model = load_checkpoint(CHECKPOINT_PATH, model)
|
| 1324 |
print("Successfully loaded the model")
|
| 1325 |
|
|
|
|
| 1326 |
video_emb, audio_emb = get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True)
|
| 1327 |
|
| 1328 |
-
# Process in batches
|
| 1329 |
-
# batch_size = 12
|
| 1330 |
-
# video_emb = []
|
| 1331 |
-
# audio_emb = []
|
| 1332 |
-
|
| 1333 |
-
# for i in tqdm(range(0, len(video_sequences), batch_size)):
|
| 1334 |
-
# video_inp = video_sequences[i:i+batch_size, ]
|
| 1335 |
-
# audio_inp = audio_sequences[i:i+batch_size, ]
|
| 1336 |
-
|
| 1337 |
-
# vid_emb = model.forward_vid(video_inp.to(device))
|
| 1338 |
-
# vid_emb = torch.mean(vid_emb, axis=-1).unsqueeze(-1)
|
| 1339 |
-
# aud_emb = model.forward_aud(audio_inp.to(device))
|
| 1340 |
-
|
| 1341 |
-
# video_emb.append(vid_emb.detach())
|
| 1342 |
-
# audio_emb.append(aud_emb.detach())
|
| 1343 |
-
|
| 1344 |
-
# torch.cuda.empty_cache()
|
| 1345 |
-
|
| 1346 |
-
# audio_emb = torch.cat(audio_emb, dim=0)
|
| 1347 |
-
# video_emb = torch.cat(video_emb, dim=0)
|
| 1348 |
|
| 1349 |
# L2 normalize embeddings
|
| 1350 |
video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
|
|
@@ -1429,11 +1410,6 @@ def process_video_activespeaker(video_path, global_speaker, num_avg_frames):
|
|
| 1429 |
return None, status
|
| 1430 |
|
| 1431 |
# Pre-process and extract per-speaker tracks in each scene
|
| 1432 |
-
print("Pre-processing the input video...")
|
| 1433 |
-
# status = subprocess.call("python preprocess/inference_preprocess.py --data_dir={}/temp --sd_root={}/crops --work_root={}/metadata --data_root={}".format(result_folder_input, result_folder_input, result_folder_input, video_path), shell=True)
|
| 1434 |
-
# if status != 0:
|
| 1435 |
-
# msg = "Error in pre-processing the input video, please check the input video and try again..."
|
| 1436 |
-
# return None, msg
|
| 1437 |
status = preprocess_asd(video_path, result_folder_input)
|
| 1438 |
if status != "success":
|
| 1439 |
return None, status
|
|
|
|
| 554 |
model.load_state_dict(new_s)
|
| 555 |
|
| 556 |
if use_cuda:
|
| 557 |
+
model.to(device)
|
| 558 |
|
| 559 |
print("Loaded checkpoint from: {}".format(path))
|
| 560 |
|
|
|
|
| 1323 |
model = load_checkpoint(CHECKPOINT_PATH, model)
|
| 1324 |
print("Successfully loaded the model")
|
| 1325 |
|
| 1326 |
+
# Extract embeddings
|
| 1327 |
video_emb, audio_emb = get_embeddings(video_sequences, audio_sequences, model, calc_aud_emb=True)
|
| 1328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1329 |
|
| 1330 |
# L2 normalize embeddings
|
| 1331 |
video_emb = torch.nn.functional.normalize(video_emb, p=2, dim=1)
|
|
|
|
| 1410 |
return None, status
|
| 1411 |
|
| 1412 |
# Pre-process and extract per-speaker tracks in each scene
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1413 |
status = preprocess_asd(video_path, result_folder_input)
|
| 1414 |
if status != "success":
|
| 1415 |
return None, status
|