Spaces:
Build error
Build error
Iskaj
commited on
Commit
·
0afaddb
1
Parent(s):
6715214
add docs to videomatch.py
Browse files- videomatch.py +104 -41
videomatch.py
CHANGED
@@ -15,14 +15,17 @@ import pandas as pd
|
|
15 |
from videohash import compute_hashes, filepath_from_url
|
16 |
from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
|
17 |
|
18 |
-
# def get_target_urls(json_file='apb2022.json'):
|
19 |
-
# """ Obtain target urls for the target videos of a json file containing .mp4 files """
|
20 |
-
# with open('apb2022.json', "r") as json_file:
|
21 |
-
# target_videos = json.load(json_file)
|
22 |
-
# return [video['mp4'] for video in target_videos]
|
23 |
-
|
24 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
25 |
-
""" Compute hashes of a video and index the video using faiss indices and return the index.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
filepath = filepath_from_url(url)
|
27 |
if os.path.exists(f'{filepath}.index'):
|
28 |
logging.info(f"Loading indexed hashes from {filepath}.index")
|
@@ -30,59 +33,90 @@ def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
|
30 |
logging.info(f"Index {filepath}.index has in total {binary_index.ntotal} frames")
|
31 |
return binary_index
|
32 |
|
|
|
33 |
hash_vectors = np.array([x['hash'] for x in compute_hashes(url)])
|
34 |
logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
|
35 |
|
36 |
# Initializing the quantizer.
|
37 |
quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
|
|
|
38 |
# Initializing index.
|
39 |
index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
|
40 |
-
index.nprobe = 1 #
|
41 |
-
|
|
|
42 |
index.train(hash_vectors)
|
43 |
-
#index = faiss.IndexBinaryFlat(64)
|
44 |
index.add(hash_vectors)
|
45 |
faiss.write_index_binary(index, f'{filepath}.index')
|
46 |
logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
|
|
|
47 |
return index
|
48 |
|
49 |
def get_video_index(url: str):
|
50 |
"""" Builds up a FAISS index for a video.
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
"""
|
54 |
-
# Url (short video)
|
55 |
video_index = index_hashes_for_video(url)
|
56 |
-
video_index.make_direct_map() # Make sure the index is indexable
|
57 |
-
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
|
58 |
|
|
|
|
|
|
|
|
|
|
|
59 |
return video_index, hash_vectors
|
60 |
|
61 |
def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
|
62 |
""" The comparison between the target and the original video will be plotted based
|
63 |
on the matches between the target and the original video over time. The matches are determined
|
64 |
based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
"""
|
66 |
-
# The results are returned as a triplet of 1D arrays
|
67 |
-
# lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
|
68 |
-
# (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
|
69 |
lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
|
70 |
return lims, D, I, hash_vectors
|
71 |
|
72 |
def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE):
|
73 |
""" To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
|
74 |
-
until the number of matches found is equal to or higher than the number of frames in the source video
|
|
|
|
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
|
82 |
-
distance = int(distance)
|
83 |
-
# --- Previously ---
|
84 |
-
# video_index, hash_vectors = get_video_index(filepath)
|
85 |
-
# target_index, _ = get_video_index(target)
|
86 |
_, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
87 |
nr_source_frames = video_index.ntotal
|
88 |
nr_matches = len(D)
|
@@ -91,36 +125,64 @@ def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, M
|
|
91 |
if nr_matches >= nr_source_frames:
|
92 |
return distance
|
93 |
logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
|
|
|
94 |
return None
|
95 |
|
96 |
def get_change_points(df, smoothing_window_size=10, method='ROBUST', metric="ROLL_OFFSET_MODE"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
tsd = TimeSeriesData(df.loc[:,['time', metric]])
|
|
|
|
|
98 |
if method.upper() == "CUSUM":
|
99 |
detector = CUSUMDetector(tsd)
|
100 |
elif method.upper() == "ROBUST":
|
101 |
detector = RobustStatDetector(tsd)
|
102 |
change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
|
103 |
|
104 |
-
#
|
105 |
if method.upper() == "CUSUM" and change_points != []:
|
106 |
mean_offset_prechange = change_points[0].mu0
|
107 |
mean_offset_postchange = change_points[0].mu1
|
108 |
jump_s = mean_offset_postchange - mean_offset_prechange
|
109 |
-
|
|
|
110 |
return change_points
|
111 |
|
112 |
-
def get_videomatch_df(lims, D, I, hash_vectors, distance,
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
118 |
|
|
|
|
|
|
|
|
|
|
|
119 |
target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
120 |
target_s = [i/FPS for j in target for i in j]
|
121 |
source_s = [i/FPS for i in I]
|
122 |
|
123 |
-
# Make
|
124 |
df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
|
125 |
if vanilla_df:
|
126 |
return df
|
@@ -129,7 +191,7 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
|
|
129 |
df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
|
130 |
df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
|
131 |
|
132 |
-
# Group by X so for every second/x there will be 1 value
|
133 |
grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
|
134 |
grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
|
135 |
|
@@ -138,7 +200,7 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
|
|
138 |
df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
|
139 |
df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
|
140 |
|
141 |
-
# Add NAN to "missing" x values
|
142 |
step_size = 1/FPS
|
143 |
x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
|
144 |
df['TARGET_S'] = np.round(df['TARGET_S'], 1)
|
@@ -150,11 +212,11 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
|
|
150 |
# Interpolate between frames since there are missing values
|
151 |
df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
|
152 |
|
153 |
-
# Add timeshift col and timeshift col with Linearly Interpolated Values
|
154 |
df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
|
155 |
df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
|
156 |
|
157 |
-
# Add
|
158 |
df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
|
159 |
df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
|
160 |
|
@@ -163,4 +225,5 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
|
|
163 |
|
164 |
# Add time column for plotting
|
165 |
df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
|
|
|
166 |
return df
|
|
|
15 |
from videohash import compute_hashes, filepath_from_url
|
16 |
from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
19 |
+
""" Compute hashes of a video and index the video using faiss indices and return the index.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
url (str): url to to compute hashes for and index.
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
index (IndexBinaryIVF): an abstract structure for a FAISS-based binary index of the hashes.
|
26 |
+
|
27 |
+
"""
|
28 |
+
# If the url already had indices created, fetch those.
|
29 |
filepath = filepath_from_url(url)
|
30 |
if os.path.exists(f'{filepath}.index'):
|
31 |
logging.info(f"Loading indexed hashes from {filepath}.index")
|
|
|
33 |
logging.info(f"Index {filepath}.index has in total {binary_index.ntotal} frames")
|
34 |
return binary_index
|
35 |
|
36 |
+
# Create hash vectors for url by looping over hashes from the video.
|
37 |
hash_vectors = np.array([x['hash'] for x in compute_hashes(url)])
|
38 |
logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
|
39 |
|
40 |
# Initializing the quantizer.
|
41 |
quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
|
42 |
+
|
43 |
# Initializing index.
|
44 |
index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
|
45 |
+
index.nprobe = 1 # Nr of nearest clusters to be searched per query.
|
46 |
+
|
47 |
+
# Training and write the quantizer.
|
48 |
index.train(hash_vectors)
|
|
|
49 |
index.add(hash_vectors)
|
50 |
faiss.write_index_binary(index, f'{filepath}.index')
|
51 |
logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
|
52 |
+
|
53 |
return index
|
54 |
|
55 |
def get_video_index(url: str):
|
56 |
"""" Builds up a FAISS index for a video.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
filepath (str): Location of the source video (video that is to be indexed)
|
60 |
+
|
61 |
+
Returns:
|
62 |
+
video_index (IndexBinaryIVF): an abstract structure for a FAISS-based binary index of the hashes.
|
63 |
+
hash_vectors (ndarray): vector of the indexed frames that can be searched
|
64 |
+
|
65 |
"""
|
|
|
66 |
video_index = index_hashes_for_video(url)
|
|
|
|
|
67 |
|
68 |
+
# Make sure the index is indexable
|
69 |
+
video_index.make_direct_map()
|
70 |
+
|
71 |
+
# Retrieve original indices
|
72 |
+
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)])
|
73 |
return video_index, hash_vectors
|
74 |
|
75 |
def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
|
76 |
""" The comparison between the target and the original video will be plotted based
|
77 |
on the matches between the target and the original video over time. The matches are determined
|
78 |
based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
|
79 |
+
|
80 |
+
The results are returned as a triplet of 1D arrays:
|
81 |
+
lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
|
82 |
+
(indices of neighbors), D[lims[i]:lims[i+1]] (distances).
|
83 |
+
(See: https://github.com/facebookresearch/faiss/wiki/Special-operations-on-indexes)
|
84 |
+
|
85 |
+
Args:
|
86 |
+
hash_vectors (ndarray): vector of the indexed frames that can be searched.
|
87 |
+
target_index (IndexBinaryIVF): an abstract structure for a FAISS-based binary index of the hashes.
|
88 |
+
MIN_DISTANCE (int): minium distance for a match
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
lims (ndarray): from where to where in I and D the result for query i is
|
92 |
+
D (ndarray): distances of the vectors within a radius around the query point
|
93 |
+
I (ndarray): indices of the neighbours
|
94 |
+
hash_vectors (ndarray): vector of the indexed frames that can be searched.
|
95 |
+
|
96 |
"""
|
|
|
|
|
|
|
97 |
lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
|
98 |
return lims, D, I, hash_vectors
|
99 |
|
100 |
def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE):
|
101 |
""" To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
|
102 |
+
until the number of matches found is equal to or higher than the number of frames in the source video.
|
103 |
+
If the number of matches with a certain video is larger than the amount of frames, we set the distance heuristic.
|
104 |
+
This was emperically determined to be a decent heuristic to find the distance heuristic
|
105 |
|
106 |
+
Args:
|
107 |
+
video_index (IndexBinaryIVF): The index of the source video
|
108 |
+
hash_vectors (ndarray): The hash vectors of the target video
|
109 |
+
target_index (IndexBinaryIVF): The index of the target video
|
110 |
+
MIN_DISTANCE (int): Minimum distance between vectors to be considered a match.
|
111 |
+
MAX_DISTANCE (int): Maximum distance between vectors to prevent bad matches.
|
112 |
+
|
113 |
+
Returns:
|
114 |
+
None if not distance is found, otherwise an integer representing the heuristic distance value.
|
115 |
+
|
116 |
+
"""
|
117 |
+
# Go over every distance with a step size of 2, since the distance increases/decreases with that step size
|
118 |
for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
|
119 |
+
distance = int(distance) # Cast for safety
|
|
|
|
|
|
|
120 |
_, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
121 |
nr_source_frames = video_index.ntotal
|
122 |
nr_matches = len(D)
|
|
|
125 |
if nr_matches >= nr_source_frames:
|
126 |
return distance
|
127 |
logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
|
128 |
+
|
129 |
return None
|
130 |
|
131 |
def get_change_points(df, smoothing_window_size=10, method='ROBUST', metric="ROLL_OFFSET_MODE"):
|
132 |
+
"""Using https://github.com/facebookresearch/Kats to analyze the data to find points where the metric
|
133 |
+
changes.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
df (DataFrame): Dataframe holding the information between the matching of two videos
|
137 |
+
smoothing_window_size (int): Smoothing window for the timeseries analysis. Defaults to 10.
|
138 |
+
method (str): Method for the timeseries analyis. Defaults to 'ROBUST'.
|
139 |
+
metric (str): Main reporting metric for the timeseries analysis. Defaults to "ROLL_OFFSET_MODE".
|
140 |
+
|
141 |
+
Returns:
|
142 |
+
change_points [TimeSeriesChangePoint]: Array of time series change point objects.
|
143 |
+
|
144 |
+
"""
|
145 |
+
# Convert the df to how kats wants it
|
146 |
tsd = TimeSeriesData(df.loc[:,['time', metric]])
|
147 |
+
|
148 |
+
# Depending on the method get the change points
|
149 |
if method.upper() == "CUSUM":
|
150 |
detector = CUSUMDetector(tsd)
|
151 |
elif method.upper() == "ROBUST":
|
152 |
detector = RobustStatDetector(tsd)
|
153 |
change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
|
154 |
|
155 |
+
# Log some statistics
|
156 |
if method.upper() == "CUSUM" and change_points != []:
|
157 |
mean_offset_prechange = change_points[0].mu0
|
158 |
mean_offset_postchange = change_points[0].mu1
|
159 |
jump_s = mean_offset_postchange - mean_offset_prechange
|
160 |
+
logging.info(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
|
161 |
+
|
162 |
return change_points
|
163 |
|
164 |
+
def get_videomatch_df(lims, D, I, hash_vectors, distance, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
|
165 |
+
"""Get the dataframe holding all information of the comparison between two videos.
|
166 |
+
|
167 |
+
Args:
|
168 |
+
lims (ndarray): from where to where in I and D the result for query i is
|
169 |
+
D (ndarray): distances of the vectors within a radius around the query point
|
170 |
+
I (ndarray): indices of the neighbours
|
171 |
+
hash_vectors (ndarray): vector of the indexed frames that can be searched.
|
172 |
+
distance (int): heuristic distance to use for the search for most accurate matches.
|
173 |
+
window_size (int): Rolling window size that is used when calculating the mode. Defaults to ROLLING_WINDOW_SIZE.
|
174 |
+
vanilla_df: Toggle for returning other baseline dataframe. Defaults to False.
|
175 |
|
176 |
+
Returns:
|
177 |
+
df (DataFrame): Dataframe with extra information added about decision making regarding the match between videos.
|
178 |
+
|
179 |
+
"""
|
180 |
+
# Get match locations in seconds
|
181 |
target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
182 |
target_s = [i/FPS for j in target for i in j]
|
183 |
source_s = [i/FPS for i in I]
|
184 |
|
185 |
+
# Make dataframe
|
186 |
df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
|
187 |
if vanilla_df:
|
188 |
return df
|
|
|
191 |
df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
|
192 |
df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
|
193 |
|
194 |
+
# Group by X so for every second/x there will be 1 source value in the end
|
195 |
grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
|
196 |
grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
|
197 |
|
|
|
200 |
df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
|
201 |
df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
|
202 |
|
203 |
+
# Add NAN to "missing" x values
|
204 |
step_size = 1/FPS
|
205 |
x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
|
206 |
df['TARGET_S'] = np.round(df['TARGET_S'], 1)
|
|
|
212 |
# Interpolate between frames since there are missing values
|
213 |
df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
|
214 |
|
215 |
+
# Add timeshift col and timeshift col with Linearly Interpolated Values (LIP)
|
216 |
df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
|
217 |
df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
|
218 |
|
219 |
+
# Add offset col that assumes the video is played at the same speed as the other to do a "timeshift"
|
220 |
df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
|
221 |
df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
|
222 |
|
|
|
225 |
|
226 |
# Add time column for plotting
|
227 |
df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
|
228 |
+
|
229 |
return df
|