Spaces:
Paused
Paused
add docstring
Browse files- Dockerfile +13 -9
- src/display_gloss.py +196 -71
- src/{local_dataset → enhanced_dataset} +0 -0
- src/main.py +24 -4
- src/synonyms_preprocess.py +87 -10
Dockerfile
CHANGED
|
@@ -5,24 +5,28 @@ RUN apt-get update && \
|
|
| 5 |
apt-get install ffmpeg libsm6 libxext6 -y && \
|
| 6 |
apt-get clean
|
| 7 |
|
| 8 |
-
# Install the dependancies
|
|
|
|
| 9 |
COPY requirements.txt /
|
| 10 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
#
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
RUN [ "python", "-c", "import nltk; nltk.download('wordnet', download_dir='/usr/local/nltk_data')" ]
|
| 17 |
|
| 18 |
-
# Copy the code files
|
|
|
|
| 19 |
COPY src /
|
| 20 |
|
| 21 |
-
# Listen to port
|
|
|
|
| 22 |
EXPOSE 5000
|
| 23 |
|
| 24 |
-
# Define the working dir in the contener
|
|
|
|
| 25 |
WORKDIR /
|
| 26 |
|
| 27 |
-
# Commande to start the app
|
|
|
|
| 28 |
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "main:app"]
|
|
|
|
| 5 |
apt-get install ffmpeg libsm6 libxext6 -y && \
|
| 6 |
apt-get clean
|
| 7 |
|
| 8 |
+
# ---- Install the dependancies
|
| 9 |
+
#
|
| 10 |
COPY requirements.txt /
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
| 13 |
+
# ---- RUN python
|
| 14 |
+
# ---- Will execute nltk.download('wordnet')
|
| 15 |
+
#
|
|
|
|
| 16 |
RUN [ "python", "-c", "import nltk; nltk.download('wordnet', download_dir='/usr/local/nltk_data')" ]
|
| 17 |
|
| 18 |
+
# ---- Copy the code files
|
| 19 |
+
#
|
| 20 |
COPY src /
|
| 21 |
|
| 22 |
+
# ---- Listen to port 5000
|
| 23 |
+
#
|
| 24 |
EXPOSE 5000
|
| 25 |
|
| 26 |
+
# ---- Define the working dir in the contener
|
| 27 |
+
#
|
| 28 |
WORKDIR /
|
| 29 |
|
| 30 |
+
# ---- Commande to start the app
|
| 31 |
+
# ----
|
| 32 |
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "main:app"]
|
src/display_gloss.py
CHANGED
|
@@ -2,36 +2,44 @@ import cv2
|
|
| 2 |
import json
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
| 5 |
-
import os
|
| 6 |
import time
|
| 7 |
|
|
|
|
| 8 |
def draw_hands_connections(frame, hand_landmarks):
|
| 9 |
'''
|
| 10 |
-
Draw white lines between relevant
|
| 11 |
-
|
| 12 |
Parameters
|
| 13 |
----------
|
| 14 |
-
frame: numpy array
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
frame: numpy array
|
|
|
|
| 20 |
'''
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
|
| 23 |
[5, 6], [6, 7], [7, 8],
|
| 24 |
[9, 10], [10, 11], [11, 12],
|
| 25 |
[13, 14], [14, 15], [15, 16],
|
| 26 |
[17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
|
| 27 |
|
| 28 |
-
# loop to draw left hand
|
|
|
|
| 29 |
for connection in hand_connections:
|
| 30 |
landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
|
| 31 |
landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
|
| 32 |
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
|
| 33 |
|
| 34 |
-
# loop to to draw right hand
|
|
|
|
| 35 |
for connection in hand_connections:
|
| 36 |
landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
|
| 37 |
landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
|
|
@@ -41,20 +49,28 @@ def draw_hands_connections(frame, hand_landmarks):
|
|
| 41 |
|
| 42 |
def draw_pose_connections(frame, pose_landmarks):
|
| 43 |
'''
|
| 44 |
-
Draw white lines between relevant
|
| 45 |
-
|
| 46 |
Parameters
|
| 47 |
----------
|
| 48 |
-
frame: numpy array
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
'''
|
| 55 |
-
|
|
|
|
|
|
|
| 56 |
pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]
|
| 57 |
|
|
|
|
|
|
|
| 58 |
for connection in pose_connections:
|
| 59 |
landmark_start = pose_landmarks.get(str(connection[0]))
|
| 60 |
landmark_end = pose_landmarks.get(str(connection[1]))
|
|
@@ -64,31 +80,38 @@ def draw_pose_connections(frame, pose_landmarks):
|
|
| 64 |
|
| 65 |
def draw_face_connections(frame, face_landmarks):
|
| 66 |
'''
|
| 67 |
-
Draw white lines between relevant
|
| 68 |
-
|
| 69 |
Parameters
|
| 70 |
----------
|
| 71 |
-
frame: numpy array
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
'''
|
| 78 |
-
# define pose connections
|
|
|
|
| 79 |
connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
|
|
|
|
|
|
|
| 92 |
for keypoints_list in connections_dict.values():
|
| 93 |
for index in range(len(keypoints_list)):
|
| 94 |
if index + 1 < len(keypoints_list):
|
|
@@ -98,20 +121,78 @@ def draw_face_connections(frame, face_landmarks):
|
|
| 98 |
return frame
|
| 99 |
|
| 100 |
def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
for keypoint in landmarks.keys():
|
| 102 |
landmark_x, landmark_y = landmarks[keypoint]
|
| 103 |
landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
|
|
|
|
| 104 |
return landmarks
|
| 105 |
|
| 106 |
def generate_video(gloss_list, dataset, vocabulary_list):
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
FIXED_WIDTH, FIXED_HEIGHT = 576, 384
|
| 110 |
-
fps = 25
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
for gloss in gloss_list:
|
|
|
|
|
|
|
| 113 |
if not check_gloss_in_vocabulary(gloss, vocabulary_list):
|
| 114 |
continue
|
|
|
|
|
|
|
|
|
|
| 115 |
video_id = select_video_id_from_gloss(gloss, dataset)
|
| 116 |
video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
|
| 117 |
with open(video_landmarks_path, 'r') as f:
|
|
@@ -119,76 +200,120 @@ def generate_video(gloss_list, dataset, vocabulary_list):
|
|
| 119 |
width = video_landmarks[-1].get('width')
|
| 120 |
height = video_landmarks[-1].get('height')
|
| 121 |
|
| 122 |
-
#
|
|
|
|
| 123 |
resize_rate_width, resize_rate_height = FIXED_WIDTH / width, FIXED_HEIGHT/height
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
font_scale = 1
|
| 128 |
-
font_color = (0, 255, 0)
|
| 129 |
-
thickness = 2
|
| 130 |
-
line_type = cv2.LINE_AA
|
| 131 |
-
|
| 132 |
for frame_landmarks in video_landmarks[:-1]:
|
|
|
|
|
|
|
| 133 |
blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
|
| 134 |
frame_hands_landmarks = frame_landmarks['hands_landmarks']
|
| 135 |
frame_pose_landmarks = frame_landmarks['pose_landmarks']
|
| 136 |
frame_face_landmarks = frame_landmarks['face_landmarks']
|
| 137 |
|
| 138 |
-
#
|
| 139 |
-
#
|
| 140 |
-
|
| 141 |
-
#for x, y in left_hand_landmarks_xy:
|
| 142 |
-
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
|
| 143 |
-
#for x, y in right_hand_landmarks_xy:
|
| 144 |
-
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
|
| 145 |
-
|
| 146 |
-
# pose_landmarks_xy = [(x, y) for x, y in frame_pose_landmarks.values()]
|
| 147 |
-
# for x, y in pose_landmarks_xy:
|
| 148 |
-
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
|
| 149 |
-
|
| 150 |
-
# face_landmarks_xy = [(x, y) for x, y in frame_face_landmarks.values()]
|
| 151 |
-
# for x, y in face_landmarks_xy:
|
| 152 |
-
# cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
|
| 153 |
frame_hands_landmarks_rs = {
|
| 154 |
'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
|
| 155 |
'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
|
| 156 |
}
|
| 157 |
frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
|
| 158 |
frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
|
|
|
|
|
|
|
|
|
|
| 159 |
draw_hands_connections(blank_image, frame_hands_landmarks_rs)
|
| 160 |
draw_pose_connections(blank_image, frame_pose_landmarks_rs)
|
| 161 |
draw_face_connections(blank_image, frame_face_landmarks_rs)
|
| 162 |
|
| 163 |
-
|
|
|
|
|
|
|
| 164 |
text_x = (FIXED_WIDTH - text_size[0]) // 2
|
| 165 |
text_y = FIXED_HEIGHT - 10
|
| 166 |
-
cv2.putText(blank_image,
|
| 167 |
|
| 168 |
-
|
|
|
|
| 169 |
_, buffer = cv2.imencode('.jpg', blank_image)
|
| 170 |
frame = buffer.tobytes()
|
| 171 |
|
| 172 |
yield (b'--frame\r\n'
|
| 173 |
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
|
| 174 |
|
| 175 |
-
time.sleep(1 /
|
|
|
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
def load_data(dataset_path='local_dataset'):
|
| 179 |
filepath = dataset_path
|
| 180 |
data_df = pd.read_csv(filepath, dtype={'video_id': str})
|
| 181 |
vocabulary_list = data_df['gloss'].tolist()
|
|
|
|
| 182 |
return data_df, vocabulary_list
|
| 183 |
|
| 184 |
|
| 185 |
def check_gloss_in_vocabulary(gloss, vocabulary_list):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
return gloss in vocabulary_list
|
| 187 |
|
|
|
|
| 188 |
def select_video_id_from_gloss(gloss, dataset):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
|
|
|
|
| 190 |
if gloss in filtered_data_id_11['gloss'].tolist():
|
| 191 |
video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
|
| 192 |
else:
|
| 193 |
video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
|
|
|
|
| 194 |
return video_id[0]
|
|
|
|
| 2 |
import json
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
|
|
|
| 5 |
import time
|
| 6 |
|
| 7 |
+
|
| 8 |
def draw_hands_connections(frame, hand_landmarks):
|
| 9 |
'''
|
| 10 |
+
Draw white lines on the given frame between relevant hand keypoints.
|
| 11 |
+
|
| 12 |
Parameters
|
| 13 |
----------
|
| 14 |
+
frame: numpy array
|
| 15 |
+
The frame on which we want to draw.
|
| 16 |
+
hand_landmarks: dict
|
| 17 |
+
Dictionary mapping keypoint IDs (integers) to hand landmarks
|
| 18 |
+
(lists of two floats corresponding to the coordinates) for both hands.
|
| 19 |
|
| 20 |
+
Returns
|
| 21 |
+
-------
|
| 22 |
+
frame: numpy array
|
| 23 |
+
The frame with the newly drawn hand connections.
|
| 24 |
'''
|
| 25 |
+
|
| 26 |
+
# ---- Define hand_connections between keypoints to draw
|
| 27 |
+
#
|
| 28 |
hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
|
| 29 |
[5, 6], [6, 7], [7, 8],
|
| 30 |
[9, 10], [10, 11], [11, 12],
|
| 31 |
[13, 14], [14, 15], [15, 16],
|
| 32 |
[17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
|
| 33 |
|
| 34 |
+
# ---- loop to draw left hand connections
|
| 35 |
+
#
|
| 36 |
for connection in hand_connections:
|
| 37 |
landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
|
| 38 |
landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
|
| 39 |
cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
|
| 40 |
|
| 41 |
+
# ---- loop to to draw right hand connections
|
| 42 |
+
#
|
| 43 |
for connection in hand_connections:
|
| 44 |
landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
|
| 45 |
landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
|
|
|
|
| 49 |
|
| 50 |
def draw_pose_connections(frame, pose_landmarks):
|
| 51 |
'''
|
| 52 |
+
Draw white lines on the given frame between relevant posture keypoints.
|
| 53 |
+
|
| 54 |
Parameters
|
| 55 |
----------
|
| 56 |
+
frame: numpy array
|
| 57 |
+
The frame on which we want to draw.
|
| 58 |
+
pose_landmarks: dict
|
| 59 |
+
Dictionary mapping keypoint IDs (integers) to posture landmarks
|
| 60 |
+
(lists of two floats corresponding to the coordinates).
|
| 61 |
+
|
| 62 |
+
Returns
|
| 63 |
+
-------
|
| 64 |
+
frame: numpy array
|
| 65 |
+
The frame with the newly drawn posture connections.
|
| 66 |
'''
|
| 67 |
+
|
| 68 |
+
# ---- define posture connections between keypoints to draw
|
| 69 |
+
#
|
| 70 |
pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]
|
| 71 |
|
| 72 |
+
# ---- loop to to draw posture connections
|
| 73 |
+
#
|
| 74 |
for connection in pose_connections:
|
| 75 |
landmark_start = pose_landmarks.get(str(connection[0]))
|
| 76 |
landmark_end = pose_landmarks.get(str(connection[1]))
|
|
|
|
| 80 |
|
| 81 |
def draw_face_connections(frame, face_landmarks):
|
| 82 |
'''
|
| 83 |
+
Draw white lines on the given frame between relevant face keypoints.
|
| 84 |
+
|
| 85 |
Parameters
|
| 86 |
----------
|
| 87 |
+
frame: numpy array
|
| 88 |
+
The frame on which we want to draw.
|
| 89 |
+
face_landmarks: dict
|
| 90 |
+
Dictionary mapping keypoint IDs (integers) to face landmarks
|
| 91 |
+
(lists of two floats corresponding to the coordinates).
|
| 92 |
+
|
| 93 |
+
Returns
|
| 94 |
+
-------
|
| 95 |
+
frame: numpy array
|
| 96 |
+
The frame with the newly drawn face connections.
|
| 97 |
'''
|
| 98 |
+
# ---- define pose connections
|
| 99 |
+
#
|
| 100 |
connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
|
| 101 |
+
'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
|
| 102 |
+
'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
|
| 103 |
+
'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
|
| 104 |
+
'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
|
| 105 |
+
'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
|
| 106 |
+
'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
|
| 107 |
+
'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
|
| 108 |
+
'noseTip_midwayBetweenEye' : [1, 168],\
|
| 109 |
+
'noseTip_noseRightCorner' : [1, 98],\
|
| 110 |
+
'noseTip_LeftCorner' : [1, 327]\
|
| 111 |
+
}
|
| 112 |
|
| 113 |
+
# ---- loop to to draw face connections
|
| 114 |
+
#
|
| 115 |
for keypoints_list in connections_dict.values():
|
| 116 |
for index in range(len(keypoints_list)):
|
| 117 |
if index + 1 < len(keypoints_list):
|
|
|
|
| 121 |
return frame
|
| 122 |
|
| 123 |
def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
|
| 124 |
+
'''
|
| 125 |
+
Resize landmark coordinates by applying specific scaling factors
|
| 126 |
+
to both the width and height of the frame.
|
| 127 |
+
|
| 128 |
+
Parameters
|
| 129 |
+
----------
|
| 130 |
+
landmarks: dict
|
| 131 |
+
Dictionary mapping keypoint IDs (integers) to landmarks
|
| 132 |
+
(lists of two floats corresponding to the coordinates).
|
| 133 |
+
resize_rate_width: float
|
| 134 |
+
Scaling factor applied to the x-coordinate (width).
|
| 135 |
+
resize_rate_height: float
|
| 136 |
+
Scaling factor applied to the y-coordinate (height).
|
| 137 |
+
|
| 138 |
+
Returns
|
| 139 |
+
-------
|
| 140 |
+
landmarks: dict
|
| 141 |
+
Dictionary mapping keypoint IDs (integers) to the newly resized landmarks
|
| 142 |
+
(lists of two integers corresponding to the coordinates).
|
| 143 |
+
'''
|
| 144 |
+
|
| 145 |
for keypoint in landmarks.keys():
|
| 146 |
landmark_x, landmark_y = landmarks[keypoint]
|
| 147 |
landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
|
| 148 |
+
|
| 149 |
return landmarks
|
| 150 |
|
| 151 |
def generate_video(gloss_list, dataset, vocabulary_list):
|
| 152 |
+
'''
|
| 153 |
+
Generate a video stream from a list of glosses.
|
| 154 |
+
|
| 155 |
+
Parameters
|
| 156 |
+
----------
|
| 157 |
+
gloss_list: list of str
|
| 158 |
+
List of glosses from which the signing video will be generated.
|
| 159 |
+
dataset: pandas.DataFrame
|
| 160 |
+
Dataset containing information about each gloss, including paths to landmark data.
|
| 161 |
+
vocabulary_list: list of str
|
| 162 |
+
List of tokens that have associated landmarks collected.
|
| 163 |
+
|
| 164 |
+
Yields
|
| 165 |
+
------
|
| 166 |
+
frame: bytes
|
| 167 |
+
JPEG-encoded frame for streaming.
|
| 168 |
+
'''
|
| 169 |
+
# ---- Fix size of the frame to the most common size of video we have in the dataset
|
| 170 |
+
# (corresponding to signer ID 11 who has the maximum number of videos).
|
| 171 |
+
#
|
| 172 |
FIXED_WIDTH, FIXED_HEIGHT = 576, 384
|
|
|
|
| 173 |
|
| 174 |
+
# ---- Fix the Frames Per Second (FPS) to match the videos collected in the dataset.
|
| 175 |
+
#
|
| 176 |
+
FPS = 25
|
| 177 |
+
|
| 178 |
+
# ---- Define carachteristics for text display.
|
| 179 |
+
#
|
| 180 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
| 181 |
+
font_scale = 1
|
| 182 |
+
font_color = (0, 255, 0)
|
| 183 |
+
thickness = 2
|
| 184 |
+
line_type = cv2.LINE_AA
|
| 185 |
+
|
| 186 |
+
# ---- Loop over each gloss
|
| 187 |
+
#
|
| 188 |
for gloss in gloss_list:
|
| 189 |
+
# ---- Skip if gloss not in the vocabulary_list.
|
| 190 |
+
#
|
| 191 |
if not check_gloss_in_vocabulary(gloss, vocabulary_list):
|
| 192 |
continue
|
| 193 |
+
|
| 194 |
+
# ---- Get landmarks of all the frame in the dataset corresponding to the appropriate gloss.
|
| 195 |
+
#
|
| 196 |
video_id = select_video_id_from_gloss(gloss, dataset)
|
| 197 |
video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
|
| 198 |
with open(video_landmarks_path, 'r') as f:
|
|
|
|
| 200 |
width = video_landmarks[-1].get('width')
|
| 201 |
height = video_landmarks[-1].get('height')
|
| 202 |
|
| 203 |
+
# ---- Calculate resize rate for future landmark rescaling.
|
| 204 |
+
#
|
| 205 |
resize_rate_width, resize_rate_height = FIXED_WIDTH / width, FIXED_HEIGHT/height
|
| 206 |
|
| 207 |
+
# ---- Loop over each frame
|
| 208 |
+
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
for frame_landmarks in video_landmarks[:-1]:
|
| 210 |
+
# ---- Initialize blank image and get all landmarks of the given frame.
|
| 211 |
+
#
|
| 212 |
blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
|
| 213 |
frame_hands_landmarks = frame_landmarks['hands_landmarks']
|
| 214 |
frame_pose_landmarks = frame_landmarks['pose_landmarks']
|
| 215 |
frame_face_landmarks = frame_landmarks['face_landmarks']
|
| 216 |
|
| 217 |
+
# ---- Resize landmarks.
|
| 218 |
+
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
frame_hands_landmarks_rs = {
|
| 220 |
'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
|
| 221 |
'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
|
| 222 |
}
|
| 223 |
frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
|
| 224 |
frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
|
| 225 |
+
|
| 226 |
+
# ---- Draw relevant connections between keypoints on the frame.
|
| 227 |
+
#
|
| 228 |
draw_hands_connections(blank_image, frame_hands_landmarks_rs)
|
| 229 |
draw_pose_connections(blank_image, frame_pose_landmarks_rs)
|
| 230 |
draw_face_connections(blank_image, frame_face_landmarks_rs)
|
| 231 |
|
| 232 |
+
# ---- Display text corresponding to the gloss on the frame.
|
| 233 |
+
#
|
| 234 |
+
text_size, _ = cv2.getTextSize(gloss, font, font_scale, thickness)
|
| 235 |
text_x = (FIXED_WIDTH - text_size[0]) // 2
|
| 236 |
text_y = FIXED_HEIGHT - 10
|
| 237 |
+
cv2.putText(blank_image, gloss, (text_x, text_y), font, font_scale, font_color, thickness, line_type)
|
| 238 |
|
| 239 |
+
# ---- JPEG-encode the frame for streaming.
|
| 240 |
+
#
|
| 241 |
_, buffer = cv2.imencode('.jpg', blank_image)
|
| 242 |
frame = buffer.tobytes()
|
| 243 |
|
| 244 |
yield (b'--frame\r\n'
|
| 245 |
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
|
| 246 |
|
| 247 |
+
time.sleep(1 / FPS)
|
| 248 |
+
|
| 249 |
|
| 250 |
+
def load_data(dataset_path='enhanced_dataset'):
|
| 251 |
+
'''
|
| 252 |
+
Load the dataset that contains all information about glosses.
|
| 253 |
+
|
| 254 |
+
Parameters
|
| 255 |
+
----------
|
| 256 |
+
dataset_path: str
|
| 257 |
+
Local path to the dataset.
|
| 258 |
+
|
| 259 |
+
Returns
|
| 260 |
+
-------
|
| 261 |
+
data_df: pandas.DataFrame
|
| 262 |
+
DataFrame containing the dataset with information about each gloss.
|
| 263 |
+
vocabulary_list: list of str
|
| 264 |
+
List of glosses (tokens) that have associated landmarks collected.
|
| 265 |
+
'''
|
| 266 |
|
|
|
|
| 267 |
filepath = dataset_path
|
| 268 |
data_df = pd.read_csv(filepath, dtype={'video_id': str})
|
| 269 |
vocabulary_list = data_df['gloss'].tolist()
|
| 270 |
+
|
| 271 |
return data_df, vocabulary_list
|
| 272 |
|
| 273 |
|
| 274 |
def check_gloss_in_vocabulary(gloss, vocabulary_list):
|
| 275 |
+
'''
|
| 276 |
+
Check if the given gloss is in the vocabulary list.
|
| 277 |
+
|
| 278 |
+
Parameters
|
| 279 |
+
----------
|
| 280 |
+
gloss: str
|
| 281 |
+
The gloss to check.
|
| 282 |
+
vocabulary_list: list of str
|
| 283 |
+
List of glosses (tokens) that have associated landmarks collected.
|
| 284 |
+
|
| 285 |
+
Returns
|
| 286 |
+
-------
|
| 287 |
+
bool
|
| 288 |
+
True if the gloss is in the vocabulary list, False otherwise.
|
| 289 |
+
'''
|
| 290 |
+
|
| 291 |
return gloss in vocabulary_list
|
| 292 |
|
| 293 |
+
|
| 294 |
def select_video_id_from_gloss(gloss, dataset):
|
| 295 |
+
'''
|
| 296 |
+
Selects a video ID corresponding to the given gloss from the dataset.
|
| 297 |
+
|
| 298 |
+
Parameters
|
| 299 |
+
----------
|
| 300 |
+
gloss : str
|
| 301 |
+
The gloss for which to retrieve the video ID.
|
| 302 |
+
dataset : pandas.DataFrame
|
| 303 |
+
A DataFrame containing information about each gloss, including 'signer_id', 'gloss', and 'video_id'.
|
| 304 |
+
|
| 305 |
+
Returns
|
| 306 |
+
-------
|
| 307 |
+
int
|
| 308 |
+
The video ID corresponding to the given gloss. If the gloss is found for 'signer_id' 11, the video ID for that signer is returned; otherwise, the video ID for the gloss from the entire dataset is returned.
|
| 309 |
+
'''
|
| 310 |
+
# ---- Choose preferentialy ID 11 because this signer with this ID signed the more video
|
| 311 |
+
#
|
| 312 |
filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
|
| 313 |
+
|
| 314 |
if gloss in filtered_data_id_11['gloss'].tolist():
|
| 315 |
video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
|
| 316 |
else:
|
| 317 |
video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
|
| 318 |
+
|
| 319 |
return video_id[0]
|
src/{local_dataset → enhanced_dataset}
RENAMED
|
File without changes
|
src/main.py
CHANGED
|
@@ -1,40 +1,60 @@
|
|
| 1 |
import display_gloss as dg
|
| 2 |
-
import numpy as np
|
| 3 |
import synonyms_preprocess as sp
|
| 4 |
from NLP_Spacy_base_translator import NlpSpacyBaseTranslator
|
| 5 |
-
from flask import Flask,
|
| 6 |
-
|
| 7 |
|
|
|
|
|
|
|
| 8 |
app = Flask(__name__)
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
@app.route('/')
|
| 12 |
def index():
|
|
|
|
| 13 |
return render_template('index.html')
|
| 14 |
|
|
|
|
|
|
|
| 15 |
@app.route('/translate/', methods=['POST'])
|
| 16 |
def result():
|
|
|
|
|
|
|
|
|
|
| 17 |
nlp, dict_docs_spacy = sp.load_spacy_values()
|
| 18 |
_, list_2000_tokens = dg.load_data()
|
| 19 |
|
| 20 |
if request.method == 'POST':
|
|
|
|
|
|
|
|
|
|
| 21 |
sentence = request.form['inputSentence']
|
| 22 |
eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=sentence)
|
| 23 |
generated_gloss = eng_to_asl_translator.translate_to_gloss()
|
| 24 |
gloss_list_lower = [gloss.lower() for gloss in generated_gloss.split() if gloss.isalnum() ]
|
| 25 |
gloss_sentence_before_synonym = " ".join(gloss_list_lower)
|
|
|
|
|
|
|
|
|
|
| 26 |
gloss_list = [sp.find_synonyms(gloss, nlp, dict_docs_spacy, list_2000_tokens) for gloss in gloss_list_lower]
|
| 27 |
gloss_sentence_after_synonym = " ".join(gloss_list)
|
|
|
|
|
|
|
|
|
|
| 28 |
return render_template('translate.html',\
|
| 29 |
sentence=sentence,\
|
| 30 |
gloss_sentence_before_synonym=gloss_sentence_before_synonym,\
|
| 31 |
gloss_sentence_after_synonym=gloss_sentence_after_synonym)
|
| 32 |
|
|
|
|
|
|
|
| 33 |
@app.route('/video_feed')
|
| 34 |
def video_feed():
|
|
|
|
| 35 |
dataset, list_2000_tokens = dg.load_data()
|
| 36 |
sentence = request.args.get('gloss_sentence_to_display', '')
|
| 37 |
gloss_list = sentence.split()
|
|
|
|
| 38 |
return Response(dg.generate_video(gloss_list, dataset, list_2000_tokens), mimetype='multipart/x-mixed-replace; boundary=frame')
|
| 39 |
|
| 40 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import display_gloss as dg
|
|
|
|
| 2 |
import synonyms_preprocess as sp
|
| 3 |
from NLP_Spacy_base_translator import NlpSpacyBaseTranslator
|
| 4 |
+
from flask import Flask, render_template, Response, request
|
|
|
|
| 5 |
|
| 6 |
+
# ---- Initialise Flask App
|
| 7 |
+
#
|
| 8 |
app = Flask(__name__)
|
| 9 |
|
| 10 |
+
# ---- Render the homepage template
|
| 11 |
+
#
|
| 12 |
@app.route('/')
|
| 13 |
def index():
|
| 14 |
+
|
| 15 |
return render_template('index.html')
|
| 16 |
|
| 17 |
+
# ---- Translate english input sentence into gloss sentence
|
| 18 |
+
#
|
| 19 |
@app.route('/translate/', methods=['POST'])
|
| 20 |
def result():
|
| 21 |
+
|
| 22 |
+
# ---- Load NLP models and data
|
| 23 |
+
#
|
| 24 |
nlp, dict_docs_spacy = sp.load_spacy_values()
|
| 25 |
_, list_2000_tokens = dg.load_data()
|
| 26 |
|
| 27 |
if request.method == 'POST':
|
| 28 |
+
|
| 29 |
+
# ---- Get the raw sentence and translate it to gloss
|
| 30 |
+
#
|
| 31 |
sentence = request.form['inputSentence']
|
| 32 |
eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=sentence)
|
| 33 |
generated_gloss = eng_to_asl_translator.translate_to_gloss()
|
| 34 |
gloss_list_lower = [gloss.lower() for gloss in generated_gloss.split() if gloss.isalnum() ]
|
| 35 |
gloss_sentence_before_synonym = " ".join(gloss_list_lower)
|
| 36 |
+
|
| 37 |
+
# ---- Substitute gloss tokens with synonyms if not in the common token list
|
| 38 |
+
#
|
| 39 |
gloss_list = [sp.find_synonyms(gloss, nlp, dict_docs_spacy, list_2000_tokens) for gloss in gloss_list_lower]
|
| 40 |
gloss_sentence_after_synonym = " ".join(gloss_list)
|
| 41 |
+
|
| 42 |
+
# ---- Render the result template with both versions of the gloss sentence
|
| 43 |
+
#
|
| 44 |
return render_template('translate.html',\
|
| 45 |
sentence=sentence,\
|
| 46 |
gloss_sentence_before_synonym=gloss_sentence_before_synonym,\
|
| 47 |
gloss_sentence_after_synonym=gloss_sentence_after_synonym)
|
| 48 |
|
| 49 |
+
# ---- Generate video streaming from gloss_sentence
|
| 50 |
+
#
|
| 51 |
@app.route('/video_feed')
|
| 52 |
def video_feed():
|
| 53 |
+
|
| 54 |
dataset, list_2000_tokens = dg.load_data()
|
| 55 |
sentence = request.args.get('gloss_sentence_to_display', '')
|
| 56 |
gloss_list = sentence.split()
|
| 57 |
+
|
| 58 |
return Response(dg.generate_video(gloss_list, dataset, list_2000_tokens), mimetype='multipart/x-mixed-replace; boundary=frame')
|
| 59 |
|
| 60 |
if __name__ == "__main__":
|
src/synonyms_preprocess.py
CHANGED
|
@@ -4,10 +4,34 @@ from nltk.corpus import wordnet
|
|
| 4 |
|
| 5 |
|
| 6 |
def load_spacy_values(filepath_model_spacy='model_spacy_synonyms', filepath_docs_spacy = 'dict_spacy_object.pkl'):
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
with open(filepath_docs_spacy, 'rb') as file:
|
| 12 |
dict_docs_spacy_bytes = pickle.load(file)
|
| 13 |
|
|
@@ -15,33 +39,86 @@ def load_spacy_values(filepath_model_spacy='model_spacy_synonyms', filepath_docs
|
|
| 15 |
|
| 16 |
return nlp, dict_docs_spacy
|
| 17 |
|
|
|
|
| 18 |
def find_antonyms(word):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
antonyms = set()
|
|
|
|
|
|
|
|
|
|
| 20 |
syn_set = wordnet.synsets(word)
|
|
|
|
|
|
|
|
|
|
| 21 |
for syn in syn_set:
|
|
|
|
|
|
|
| 22 |
for lemma in syn.lemmas():
|
|
|
|
|
|
|
| 23 |
if lemma.antonyms():
|
| 24 |
antonyms.add(lemma.antonyms()[0].name())
|
|
|
|
| 25 |
return antonyms
|
| 26 |
|
| 27 |
-
def find_synonyms(word, model, dict_embedding, dict_2000_tokens): #cluster_to_words, dbscan_model):
|
| 28 |
-
"""
|
| 29 |
-
This function finds the most similar word in the same cluster, and excludes antonyms
|
| 30 |
-
"""
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
return word
|
| 34 |
else:
|
|
|
|
|
|
|
| 35 |
antonyms = find_antonyms(word)
|
| 36 |
-
|
| 37 |
|
|
|
|
|
|
|
| 38 |
word_embedding = model(word)
|
| 39 |
-
|
| 40 |
similarities=[]
|
| 41 |
|
| 42 |
-
for token in
|
| 43 |
similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
|
| 46 |
|
| 47 |
return most_similar_token
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def load_spacy_values(filepath_model_spacy='model_spacy_synonyms', filepath_docs_spacy = 'dict_spacy_object.pkl'):
|
| 7 |
+
'''
|
| 8 |
+
Loads a spaCy model and a dictionary of spaCy Doc objects from a pickle file.
|
| 9 |
|
| 10 |
+
Parameters
|
| 11 |
+
----------
|
| 12 |
+
filepath_model_spacy : str
|
| 13 |
+
The local path to the spaCy model used for synonym detection.
|
| 14 |
+
|
| 15 |
+
filepath_docs_spacy : str
|
| 16 |
+
The local path to the pickle file containing a dictionary where the keys are tokens
|
| 17 |
+
and the values are the corresponding spaCy Doc objects serialized as bytes.
|
| 18 |
+
|
| 19 |
+
Returns
|
| 20 |
+
-------
|
| 21 |
+
nlp : spacy.language.Language
|
| 22 |
+
The loaded spaCy language model.
|
| 23 |
|
| 24 |
+
dict_docs_spacy : dict
|
| 25 |
+
A dictionary where the keys are tokens (str) and the values are spaCy Doc objects,
|
| 26 |
+
reconstructed from the serialized bytes.
|
| 27 |
+
'''
|
| 28 |
|
| 29 |
+
# ---- Load the spaCy NLP model
|
| 30 |
+
#
|
| 31 |
+
nlp = spacy.load(filepath_model_spacy)
|
| 32 |
+
|
| 33 |
+
# ---- Load pickle file and reconstruct the dictionary with tokens as keys and spaCy Doc objects as values
|
| 34 |
+
#
|
| 35 |
with open(filepath_docs_spacy, 'rb') as file:
|
| 36 |
dict_docs_spacy_bytes = pickle.load(file)
|
| 37 |
|
|
|
|
| 39 |
|
| 40 |
return nlp, dict_docs_spacy
|
| 41 |
|
| 42 |
+
|
| 43 |
def find_antonyms(word):
|
| 44 |
+
'''
|
| 45 |
+
Generate a set of all the antonyms of a given word
|
| 46 |
+
|
| 47 |
+
Parameters
|
| 48 |
+
----------
|
| 49 |
+
word : str
|
| 50 |
+
The word that we want to find the antonyms
|
| 51 |
+
|
| 52 |
+
Returns
|
| 53 |
+
-------
|
| 54 |
+
antonyms : set of str
|
| 55 |
+
A set of all the antonym detected using nltk and WordNet
|
| 56 |
+
'''
|
| 57 |
+
|
| 58 |
antonyms = set()
|
| 59 |
+
|
| 60 |
+
# ---- Load all the set of synonyms of the word recorded from wordnet
|
| 61 |
+
#
|
| 62 |
syn_set = wordnet.synsets(word)
|
| 63 |
+
|
| 64 |
+
# ---- Loop over each set of synonyms
|
| 65 |
+
#
|
| 66 |
for syn in syn_set:
|
| 67 |
+
# ---- Loop over each synonym
|
| 68 |
+
#
|
| 69 |
for lemma in syn.lemmas():
|
| 70 |
+
# ---- Add antonyms of the synonyms to the antonyms set
|
| 71 |
+
#
|
| 72 |
if lemma.antonyms():
|
| 73 |
antonyms.add(lemma.antonyms()[0].name())
|
| 74 |
+
|
| 75 |
return antonyms
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
def find_synonyms(word, model, dict_embedding, list_2000_tokens):
|
| 79 |
+
'''
|
| 80 |
+
Finds the most similar token to a given word.
|
| 81 |
+
|
| 82 |
+
Parameters
|
| 83 |
+
----------
|
| 84 |
+
word : str
|
| 85 |
+
The word that we want to find the most similar word
|
| 86 |
+
|
| 87 |
+
model : spacy.language.Language
|
| 88 |
+
spaCy language model to use for the detection of the synonym
|
| 89 |
+
|
| 90 |
+
dict_embedding: dict
|
| 91 |
+
A dictionary where the keys are tokens (str) and the values are spaCy Doc objects
|
| 92 |
+
|
| 93 |
+
list_2000_tokens : list of str
|
| 94 |
+
A list of 2000 tokens against which the gloss will be checked.
|
| 95 |
+
|
| 96 |
+
Returns
|
| 97 |
+
-------
|
| 98 |
+
most_similar_token : str
|
| 99 |
+
The most similar token to the given word
|
| 100 |
+
'''
|
| 101 |
+
|
| 102 |
+
# ---- Skip synonym detection if the word is already in the list_2000_token
|
| 103 |
+
#
|
| 104 |
+
if word in list_2000_tokens:
|
| 105 |
return word
|
| 106 |
else:
|
| 107 |
+
# ---- Remove antonyms of the given word of the list_2000_tokens (a word and an antonym might be similar in embedding representation)
|
| 108 |
+
#
|
| 109 |
antonyms = find_antonyms(word)
|
| 110 |
+
list_2000_tokens_less_antonyms = [token for token in list_2000_tokens if token not in antonyms]
|
| 111 |
|
| 112 |
+
# ---- Generate a list of tuple (token, similarities values between the embedding of the given word and the embedding of each token of the list_2000_tokens)
|
| 113 |
+
#
|
| 114 |
word_embedding = model(word)
|
|
|
|
| 115 |
similarities=[]
|
| 116 |
|
| 117 |
+
for token in list_2000_tokens_less_antonyms:
|
| 118 |
similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))
|
| 119 |
+
|
| 120 |
+
# ---- Extract the most similar token of the list
|
| 121 |
+
#
|
| 122 |
most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
|
| 123 |
|
| 124 |
return most_similar_token
|