aiface commited on
Commit
327b68f
Β·
1 Parent(s): 907b7f3

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ preprocessing/shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
preprocessing/20words_mean_face.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf68b2044171e1160716df7c53e8bbfaa0ee8c61fb41171d04cb6092bb81422
3
+ size 1168
preprocessing/30word - Copy.csv ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ο»ΏthΓ΄ng,1916,,,,thΓ΄ng,tin,cα»§a,vΓ ,cΓ‘c,cΓ³,trong,lΓ ,ngΓ y,Δ‘Γ£,Δ‘αΊ§u,theo,cΓ΄ng,tΖ°,quΓ½
2
+ tin,1740,,,,1916,1740,1687,1640,1566,1513,1512,1344,1330,1284,1202,1197,1165,1148,1119
3
+ cα»§a,1687,,,,nhα»―ng,thΓ nh,cho,vα»‹,tαΊΏ,về,phα»‘,thΓ‘ng,Δ‘α»™ng,sαΊ£n,vα»›i,được,chΓ­nh,sα»‘,dΓ΅i
4
+ vΓ ,1640,,,,1076,1065,1050,971,937,925,919,903,902,883,873,869,797,760,750
5
+ cΓ‘c,1566,,,,,,,,,,,,,,,,,,
6
+ cΓ³,1513,,,,,,,,,,,,,,,,,,
7
+ trong,1512,,,,,,,,,,,,,,,,,,
8
+ lΓ ,1344,,,,,,,,,,,,,,,,,,
9
+ ngΓ y,1330,,,,,,,,,,,,,,,,,,
10
+ Δ‘Γ£,1284,,,,,,,,,,,,,,,,,,
11
+ Δ‘αΊ§u,1202,,,,,,,,,,,,,,,,,,
12
+ theo,1197,,,,,,,,,,,,,,,,,,
13
+ cΓ΄ng,1165,,,,,,,,,,,,,,,,,,
14
+ tΖ°,1148,,,,,,,,,,,,,,,,,,
15
+ quΓ½,1119,,,,,,,,,,,,,,,,,,
16
+ nhα»―ng,1076,,,,,,,,,,,,,,,,,,
17
+ thΓ nh,1065,,,,,,,,,,,,,,,,,,
18
+ cho,1050,,,,,,,,,,,,,,,,,,
19
+ vα»‹,971,,,,,,,,,,,,,,,,,,
20
+ tαΊΏ,937,,,,,,,,,,,,,,,,,,
21
+ về,925,,,,,,,,,,,,,,,,,,
22
+ phα»‘,919,,,,,,,,,,,,,,,,,,
23
+ thΓ‘ng,903,,,,,,,,,,,,,,,,,,
24
+ Δ‘α»™ng,902,,,,,,,,,,,,,,,,,,
25
+ sαΊ£n,883,,,,,,,,,,,,,,,,,,
26
+ vα»›i,873,,,,,,,,,,,,,,,,,,
27
+ được,869,,,,,,,,,,,,,,,,,,
28
+ chΓ­nh,797,,,,,,,,,,,,,,,,,,
29
+ sα»‘,760,,,,,,,,,,,,,,,,,,
30
+ dΓ΅i,750,,,,,,,,,,,,,,,,,,
preprocessing/30word.csv ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ thΓ΄ng,1916
2
+ tin,1740
3
+ cα»§a,1687
4
+ vΓ ,1640
5
+ cΓ‘c,1566
6
+ cΓ³,1513
7
+ trong,1512
8
+ lΓ ,1344
9
+ ngΓ y,1330
10
+ Δ‘Γ£,1284
11
+ Δ‘αΊ§u,1202
12
+ theo,1197
13
+ cΓ΄ng,1165
14
+ tΖ°,1148
15
+ quΓ½,1119
16
+ nhα»―ng,1076
17
+ thΓ nh,1065
18
+ cho,1050
19
+ vα»‹,971
20
+ tαΊΏ,937
21
+ về,925
22
+ phα»‘,919
23
+ thΓ‘ng,903
24
+ Δ‘α»™ng,902
25
+ sαΊ£n,883
26
+ vα»›i,873
27
+ được,869
28
+ chΓ­nh,797
29
+ sα»‘,760
30
+ dΓ΅i,750
preprocessing/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Pre-processing
2
+
3
+ * To get mouth ROIs
4
+
5
+ Run mouth cropping script to save grayscale mouth ROIs. We assume you save cropped mouths to *`$TCN_LIPREADING_ROOT/datasets/visual_data/`*. You can choose `--testset-only` to produce testing set.
6
+
7
+ ```Shell
8
+ python crop_mouth_from_video.py --video-direc <LRW-DIREC> \
9
+ --landmark-direc <LANDMARK-DIREC> \
10
+ --save-direc <MOUTH-ROIS-DIRECTORY> \
11
+ --convert-gray \
12
+ --testset-only
13
+ ```
14
+
15
+ * To get audio waveforms
16
+
17
+ Run format conversion script to extract audio waveforms (.npz) from raw videos. We assume you save audio waveforms to *`$TCN_LIPREADING_ROOT/datasets/audio_data/`*. You can choose `--testset-only` to produce testing set.
18
+
19
+ ```Shell
20
+ python extract_audio_from_video.py --video-direc <LRW-DIREC> \
21
+ --save-direc <AUDIO-WAVEFORMS-DIRECTORY> \
22
+ --testset-only
23
+ ```
preprocessing/anhtrasn.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transcript": "Xin kΓ­nh chΓ o quΓ½ vα»‹ KΓ­nh mời quΓ½ vα»‹",
3
+ "words": [
4
+ {
5
+ "end_time": 7.6,
6
+ "start_time": 0.0,
7
+ "word": "Xin"
8
+ },
9
+ {
10
+ "end_time": 7.8,
11
+ "start_time": 7.6,
12
+ "word": "kΓ­nh"
13
+ },
14
+ {
15
+ "end_time": 8.0,
16
+ "start_time": 7.8,
17
+ "word": "chΓ o"
18
+ },
19
+ {
20
+ "end_time": 8.0,
21
+ "start_time": 8.0,
22
+ "word": "quΓ½"
23
+ },
24
+ {
25
+ "end_time": 8.2,
26
+ "start_time": 8.0,
27
+ "word": "vα»‹"
28
+ }
29
+ ]
30
+ }
preprocessing/crop_mouth_from_video.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright 2020 Imperial College London (Pingchuan Ma)
5
+ # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
6
+
7
+ """ Crop Mouth ROIs from videos for lipreading"""
8
+
9
+ # from msilib.schema import File
10
+ from ast import Pass
11
+ import os
12
+ import cv2 # OpenCV 라이브러리
13
+ import glob # λ¦¬λˆ…μŠ€μ‹ 경둜 ν‘œκΈ°λ²•μ„ μ‚¬μš©ν•˜μ—¬ μ›ν•˜λŠ” 폴더/파일 리슀트 μ–»μŒ
14
+ import argparse # λͺ…λ Ήν–‰ 인자λ₯Ό νŒŒμ‹±ν•΄μ£ΌλŠ” λͺ¨λ“ˆ
15
+ import numpy as np
16
+ from collections import deque # collections λͺ¨λ“ˆμ— μžˆλŠ” 데크 뢈러였기 # 데크: μŠ€νƒκ³Ό 큐λ₯Ό ν•©μΉœ 자료ꡬ쑰
17
+
18
+ from utils import * # utils.py λͺ¨λ“ˆμ— μžˆλŠ” λͺ¨λ“  ν•¨μˆ˜ 뢈러였기
19
+ from transform import * # transform.py λͺ¨λ“ˆμ— μžˆλŠ” λͺ¨λ“  ν•¨μˆ˜ 뢈러였기
20
+
21
+ import dlib # face landmark μ°ΎλŠ” 라이브러리
22
+ import face_alignment # face landmark μ°ΎλŠ” 라이브러리
23
+ from PIL import Image
24
+
25
+
26
+ # μΈμžκ°’μ„ λ°›μ•„μ„œ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
27
+ def load_args(default_config=None):
28
+ # μΈμžκ°’μ„ λ°›μ•„μ„œ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
29
+ parser = argparse.ArgumentParser(description='Lipreading Pre-processing')
30
+
31
+ # μž…λ ₯받을 μΈμžκ°’ 등둝
32
+ # -- utils
33
+ parser.add_argument('--video-direc', default=None, help='raw video directory')
34
+ parser.add_argument('--video-format', default='.mp4', help='raw video format')
35
+ parser.add_argument('--landmark-direc', default=None, help='landmark directory')
36
+ parser.add_argument('--filename-path', default='./vietnamese_detected_face_30.csv', help='list of detected video and its subject ID')
37
+ parser.add_argument('--save-direc', default=None, help='the directory of saving mouth ROIs')
38
+ # -- mean face utils
39
+ parser.add_argument('--mean-face', default='./20words_mean_face.npy', help='mean face pathname')
40
+ # -- mouthROIs utils
41
+ parser.add_argument('--crop-width', default=96, type=int, help='the width of mouth ROIs')
42
+ parser.add_argument('--crop-height', default=96, type=int, help='the height of mouth ROIs')
43
+ parser.add_argument('--start-idx', default=48, type=int, help='the start of landmark index')
44
+ parser.add_argument('--stop-idx', default=68, type=int, help='the end of landmark index')
45
+ parser.add_argument('--window-margin', default=12, type=int, help='window margin for smoothed_landmarks')
46
+ # -- convert to gray scale
47
+ parser.add_argument('--convert-gray', default=False, action='store_true', help='convert2grayscale')
48
+ # -- test set only
49
+ parser.add_argument('--testset-only', default=False, action='store_true', help='process testing set only')
50
+
51
+ # μž…λ ₯받은 μΈμžκ°’μ„ args에 μ €μž₯ (type: namespace)
52
+ args = parser.parse_args()
53
+ return args
54
+
55
+ args = load_args() # args νŒŒμ‹± 및 λ‘œλ“œ
56
+
57
+ # -- mean face utils
58
+ STD_SIZE = (256, 256)
59
+ mean_face_landmarks = np.load(args.mean_face) # 20words_mean_face.npy
60
+ stablePntsIDs = [33, 36, 39, 42, 45]
61
+
62
+
63
+ # μ˜μƒμ—μ„œ λžœλ“œλ§ˆν¬ λ°›μ•„μ„œ μž…μˆ  μž˜λΌλ‚΄κΈ°
64
+ def crop_patch( video_pathname, landmarks):
65
+
66
+ """Crop mouth patch
67
+ :param str video_pathname: pathname for the video_dieo # μ˜μƒ μœ„μΉ˜
68
+ :param list landmarks: interpolated landmarks # λ³΄κ°„λœ λžœλ“œλ§ˆν¬
69
+ """
70
+
71
+ frame_idx = 0 # ν”„λ ˆμž„ 인덱슀 번호 0 으둜 μ΄ˆκΈ°ν™”
72
+ frame_gen = read_video(video_pathname) # λΉ„λ””μ˜€ 뢈러였기
73
+
74
+ # λ¬΄ν•œ 반볡
75
+ while True:
76
+ try:
77
+ frame = frame_gen.__next__() ## -- BGR # 이미지 ν”„λ ˆμž„ ν•˜λ‚˜μ”© 뢈러였기
78
+ except StopIteration: # 더 이상 next μš”μ†Œκ°€ μ—†μœΌλ©΄ StopIterraion Exception λ°œμƒ
79
+ break # while λΉ μ Έλ‚˜κ°€κΈ°
80
+ if frame_idx == 0: # ν”„λ ˆμž„ 인덱슀 λ²ˆν˜Έκ°€ 0일 경우
81
+ q_frame, q_landmarks = deque(), deque() # 데크 생성
82
+ sequence = []
83
+
84
+ q_landmarks.append(landmarks[frame_idx]) # ν”„λ ˆμž„ 인덱슀 λ²ˆν˜Έμ— λ§žλŠ” λžœλ“œλ§ˆν¬ 정보 μΆ”κ°€
85
+ q_frame.append(frame) # ν”„λ ˆμž„ 정보 μΆ”κ°€
86
+ if len(q_frame) == args.window_margin:
87
+ smoothed_landmarks = np.mean(q_landmarks, axis=0) # 각 그룹의 같은 μ›μ†ŒλΌλ¦¬ 평균
88
+ cur_landmarks = q_landmarks.popleft() # 데크 제일 μ™Όμͺ½ κ°’ κΊΌλ‚΄κΈ°
89
+ cur_frame = q_frame.popleft() # 데크 제일 μ™Όμͺ½ κ°’ κΊΌλ‚΄κΈ°
90
+ # -- affine transformation # μ•„ν•€ λ³€ν™˜
91
+ trans_frame, trans = warp_img( smoothed_landmarks[stablePntsIDs, :],
92
+ mean_face_landmarks[stablePntsIDs, :],
93
+ cur_frame,
94
+ STD_SIZE)
95
+ trans_landmarks = trans(cur_landmarks)
96
+ # -- crop mouth patch # μž…μˆ  μž˜λΌλ‚΄κΈ°
97
+ sequence.append( cut_patch( trans_frame,
98
+ trans_landmarks[args.start_idx:args.stop_idx],
99
+ args.crop_height//2,
100
+ args.crop_width//2,))
101
+ if frame_idx == len(landmarks)-1:
102
+ while q_frame:
103
+ cur_frame = q_frame.popleft() # 데크 제일 μ™Όμͺ½ κ°’ κΊΌλ‚΄κΈ°
104
+ # -- transform frame # ν”„λ ˆμž„ λ³€ν™˜
105
+ trans_frame = apply_transform( trans, cur_frame, STD_SIZE)
106
+ # -- transform landmarks # λžœλ“œλ§ˆν¬ λ³€ν™˜
107
+ trans_landmarks = trans(q_landmarks.popleft())
108
+ # -- crop mouth patch # μž…μˆ  μž˜λΌλ‚΄κΈ°
109
+ sequence.append( cut_patch( trans_frame,
110
+ trans_landmarks[args.start_idx:args.stop_idx],
111
+ args.crop_height//2,
112
+ args.crop_width//2,))
113
+ return np.array(sequence) # μž…μˆ  numpy λ°˜ν™˜
114
+ frame_idx += 1 # ν”„λ ˆμž„ 인덱슀 번호 증가
115
+ return None
116
+
117
+
118
+ # λžœλ“œλ§ˆν¬ 보간
119
+ def landmarks_interpolate(landmarks):
120
+
121
+ """Interpolate landmarks
122
+ param list landmarks: landmarks detected in raw videos # 원본 μ˜μƒ λ°μ΄ν„°μ—μ„œ κ²€μΆœν•œ λžœλ“œλ§ˆν¬
123
+ """
124
+
125
+ valid_frames_idx = [idx for idx, _ in enumerate(landmarks) if _ is not None] # λžœλ“œλ§ˆν¬ 번호 list 생성
126
+
127
+ # λžœλ“œλ§ˆν¬ 번호 list κ°€ λΉ„μ–΄μžˆλ‹€λ©΄
128
+ if not valid_frames_idx:
129
+ return None
130
+
131
+ # 1λΆ€ν„° (λžœλ“œλ§ˆν¬ 번호 list 개수-1)만큼 for λ¬Έ 반볡
132
+ for idx in range(1, len(valid_frames_idx)):
133
+ if valid_frames_idx[idx] - valid_frames_idx[idx-1] == 1: # ν˜„μž¬ λžœλ“œλ§ˆν¬ 번호 - 이전 λžœλ“œλ§ˆν¬ 번호 == 1 일 경우
134
+ continue # μ½”λ“œ μ‹€ν–‰ κ±΄λ„ˆλ›°κΈ°
135
+ else: # μ•„λ‹ˆλΌλ©΄
136
+ landmarks = linear_interpolate(landmarks, valid_frames_idx[idx-1], valid_frames_idx[idx]) # λžœλ“œλ§ˆν¬ μ—…λ°μ΄νŠΈ(보간)
137
+
138
+ valid_frames_idx = [idx for idx, _ in enumerate(landmarks) if _ is not None] # λžœλ“œλ§ˆν¬ 번호 list 생성
139
+ # -- Corner case: keep frames at the beginning or at the end failed to be detected. # μ‹œμž‘ λ˜λŠ” 끝 ν”„λ ˆμž„μ„ λ³΄κ΄€ν•˜μ§€ λͺ»ν•¨
140
+ if valid_frames_idx:
141
+ landmarks[:valid_frames_idx[0]] = [landmarks[valid_frames_idx[0]]] * valid_frames_idx[0] # λžœλ“œλ§ˆν¬ 첫번째 ν”„λ ˆμž„ 정보 μ €μž₯
142
+ landmarks[valid_frames_idx[-1]:] = [landmarks[valid_frames_idx[-1]]] * (len(landmarks) - valid_frames_idx[-1]) # λžœλ“œλ§ˆν¬ λ§ˆμ§€λ§‰ ν”„λ ˆμž„ 정보 μ €μž₯
143
+
144
+ valid_frames_idx = [idx for idx, _ in enumerate(landmarks) if _ is not None] # λžœλ“œλ§ˆν¬ 번호 list 생성
145
+ # λžœλ“œλ§ˆν¬ 번호 list 개수 == λ³΄κ°„ν•œ λžœλ“œλ§ˆν¬ 개수 확인, μ•„λ‹ˆλ©΄ AssertionError λ©”μ‹œμ§€λ₯Ό 띄움
146
+ assert len(valid_frames_idx) == len(landmarks), "not every frame has landmark" # μ›ν•˜λŠ” 쑰건의 λ³€μˆ˜κ°’μ„ λ³΄μ¦ν•˜κΈ° μœ„ν•΄ μ‚¬μš©
147
+
148
+ return landmarks # λžœλ“œλ§ˆν¬ λ°˜ν™˜
149
+
150
+
151
+ def get_yield(output_video):
152
+ for frame in output_video:
153
+ yield frame
154
+
155
+
156
+ lines = open(args.filename_path).read().splitlines() # λ¬Έμžμ—΄μ„ '\n' κΈ°μ€€μœΌλ‘œ μͺΌκ°  ν›„ list 생성
157
+ lines = list(filter(lambda x: 'test' == x.split('/')[-2], lines)) if args.testset_only else lines # args.testset_only 값이 μžˆλ‹€λ©΄ test 폴더 속 파일λͺ…λ§Œ λΆˆλŸ¬μ™€μ„œ list 생성, μ•„λ‹ˆλΌλ©΄ μ›λž˜ lines κ·ΈλŒ€λ‘œ κ°’ μœ μ§€
158
+
159
+ # lines 개수만큼 반볡문 μ‹€ν–‰
160
+ for filename_idx, line in enumerate(lines):
161
+
162
+ # 파일λͺ…, μ‚¬λžŒid
163
+ filename, person_id = line.split(',')
164
+ print('idx: {} \tProcessing.\t{}'.format(filename_idx, filename)) # 파일 인덱슀번호, 파일λͺ… 좜λ ₯
165
+
166
+ video_pathname = os.path.join(args.video_direc, filename+args.video_format) # μ˜μƒλ””λ ‰ν† λ¦¬ + 파일λͺ….λΉ„λ””μ˜€ν¬λ§·/
167
+ landmarks_pathname = os.path.join(args.landmark_direc, filename+'.npz') # μ €μž₯디렉토리 + λžœλ“œλ§ˆν¬ 파일λͺ….npz
168
+ dst_pathname = os.path.join( args.save_direc, filename+'.npz') # μ €μž₯디렉토리 + κ²°κ³Όμ˜μƒ 파일λͺ….npz
169
+
170
+ # 파일이 μžˆλŠ”μ§€ 확인, μ—†μœΌλ©΄ AssertionError λ©”μ‹œμ§€λ₯Ό 띄움
171
+ assert os.path.isfile(video_pathname), "File does not exist. Path input: {}".format(video_pathname) # μ›ν•˜λŠ” 쑰건의 λ³€μˆ˜κ°’μ„ λ³΄μ¦ν•˜κΈ° μœ„ν•΄ μ‚¬μš©
172
+
173
+ # video 에 λŒ€ν•œ face landmark npz 파일이 μ—†κ³  μ˜μƒ ν™•μž₯자 avi 인 경우 dlib 으둜 직접 npz 파일 생성
174
+ if not os.path.exists(landmarks_pathname) and video_pathname.split('.')[-1] == 'mp4':
175
+
176
+ # dlib μ‚¬μš©ν•΄μ„œ face landmark μ°ΎκΈ°
177
+ def get_face_landmark(img):
178
+ detector_hog = dlib.get_frontal_face_detector()
179
+ dlib_rects = detector_hog(img, 1)
180
+ model_path = os.path.dirname(os.path.abspath(__file__)) + '/shape_predictor_68_face_landmarks.dat'
181
+ landmark_predictor = dlib.shape_predictor(model_path)
182
+
183
+ # dlib 으둜 face landmark 찾기
184
+ list_landmarks = []
185
+ for dlib_rect in dlib_rects:
186
+ points = landmark_predictor(img, dlib_rect)
187
+ list_points = list(map(lambda p: (p.x, p.y), points.parts()))
188
+ list_landmarks.append(list_points)
189
+
190
+ input_width, input_height = img.shape
191
+ output_width, output_height = (256, 256)
192
+ width_rate = input_width / output_width
193
+ height_rate = input_height / output_height
194
+ img_rate = [(width_rate, height_rate)]*68
195
+ face_rate = np.array(img_rate)
196
+ eye_rate = np.array(img_rate[36:48])
197
+
198
+ # face landmark list κ°€ λΉ„μ–΄μžˆμ§€ μ•Šμ€ 경우
199
+ if list_landmarks:
200
+ for dlib_rect, landmark in zip(dlib_rects, list_landmarks):
201
+ face_landmark = np.array(landmark) # face landmark
202
+ eye_landmark = np.array(landmark[36:48]) # eye landmark
203
+
204
+ return face_landmark, eye_landmark
205
+ # face landmark list κ°€ λΉ„μ–΄μžˆλŠ” 경우
206
+ else:
207
+ landmark = [(0.0, 0.0)] * 68
208
+ face_landmark = np.array(landmark) # face landmark
209
+ eye_landmark = np.array(landmark[36:48]) # eye landmark
210
+ return face_landmark, eye_landmark
211
+
212
+
213
+ target_frames = 29 # μ›ν•˜λŠ” ν”„λ ˆμž„ 개수
214
+ video = videoToArray(video_pathname, is_gray=args.convert_gray) # μ˜μƒ 정보 μ•žμ— μ˜μƒ ν”„λ ˆμž„ 개수λ₯Ό μΆ”κ°€ν•œ numpy
215
+ output_video = frameAdjust(video, target_frames) # frame sampling (ν”„λ ˆμž„ 개수 λ§žμΆ”κΈ°)
216
+
217
+ multi_sub_landmarks = []
218
+ person_landmarks = []
219
+ frame_landmarks = []
220
+ for frame_idx, frame in enumerate(get_yield(output_video)):
221
+ print(f'\n ------------frame {frame_idx}------------ ')
222
+
223
+ facial_landmarks, eye_landmarks = get_face_landmark(frame) # dlib μ‚¬μš©ν•΄μ„œ face landmark μ°ΎκΈ°
224
+
225
+ person_landmarks = {
226
+ 'id': 0,
227
+ 'most_recent_fitting_scores': np.array([2.0,2.0,2.0]),
228
+ 'facial_landmarks': facial_landmarks,
229
+ 'roll': 7,
230
+ 'yaw': 3.5,
231
+ 'eye_landmarks': eye_landmarks,
232
+ 'fitting_scores_updated': True,
233
+ 'pitch': -0.05
234
+ }
235
+ frame_landmarks.append(person_landmarks)
236
+ multi_sub_landmarks.append(np.array(frame_landmarks.copy(), dtype=object))
237
+
238
+ multi_sub_landmarks = np.array(multi_sub_landmarks) # list to numpy
239
+ save2npz(landmarks_pathname, data=multi_sub_landmarks) # face landmark npz μ €μž₯
240
+ print('\n ------------ save npz ------------ \n')
241
+
242
+ # video 에 λŒ€ν•œ face landmark npz 파일이 μžˆλŠ” 경우
243
+ else:
244
+
245
+ # 파일이 μžˆλŠ”μ§€ 확인, μ—†μœΌλ©΄ AssertionError λ©”μ‹œμ§€λ₯Ό 띄움
246
+ assert os.path.isfile(landmarks_pathname), "File does not exist. Path input: {}".format(landmarks_pathname) # μ›ν•˜λŠ” 쑰건의 λ³€μˆ˜κ°’μ„ λ³΄μ¦ν•˜κΈ° μœ„ν•΄ μ‚¬μš©
247
+
248
+ # 파일이 μ‘΄μž¬ν•  경우
249
+ if os.path.exists(dst_pathname):
250
+ continue # μ½”λ“œ μ‹€ν–‰ κ±΄λ„ˆλ›°κΈ°
251
+
252
+ multi_sub_landmarks = np.load( landmarks_pathname, allow_pickle=True)['data'] # numpy 파일 μ—΄κΈ°
253
+ landmarks = [None] * len( multi_sub_landmarks) # λžœλ“œλ§ˆν¬ λ³€μˆ˜ μ΄ˆκΈ°ν™”
254
+ for frame_idx in range(len(landmarks)):
255
+ try:
256
+ landmarks[frame_idx] = multi_sub_landmarks[frame_idx][int(person_id)]['facial_landmarks'].astype(np.float64) # ν”„λ ˆμž„ 인덱슀 λ²ˆν˜Έμ—μ„œ μ‚¬λžŒid의 μ–Όκ΅΄ λžœλ“œλ§ˆν¬ 정보 κ°€μ Έμ˜€κΈ°
257
+ except IndexError: # ν•΄λ‹Ή 인덱슀 λ²ˆν˜Έμ— 깂이 μ—†μœΌλ©΄ IndexError λ°œμƒ
258
+ continue # μ½”λ“œ μ‹€ν–‰ κ±΄λ„ˆλ›°κΈ°
259
+
260
+ # face landmark κ°€ [(0,0)]*68 이 μ•„λ‹ˆλ©΄ λžœλ“œλ§ˆν¬ 보간 ν›„ npz 파일 생성
261
+ landmarks_empty_list = []
262
+ landmarks_empty = [(0, 0)]*68
263
+ landmarks_empty = np.array(landmarks_empty, dtype=object)
264
+ for i in range(len(landmarks_empty)):
265
+ landmarks_empty_list.append(landmarks_empty.copy())
266
+ condition = landmarks != landmarks_empty_list
267
+ if condition:
268
+ # -- pre-process landmarks: interpolate frames not being detected.
269
+ preprocessed_landmarks = landmarks_interpolate(landmarks) # λžœλ“œλ§ˆν¬ 보간
270
+ # λ³€μˆ˜κ°€ λΉ„μ–΄μžˆμ§€ μ•Šλ‹€λ©΄
271
+ if not preprocessed_landmarks:
272
+ continue # μ½”λ“œ μ‹€ν–‰ κ±΄λ„ˆλ›°κΈ°
273
+
274
+ # -- crop
275
+ sequence = crop_patch(video_pathname, preprocessed_landmarks) # μ˜μƒμ—μ„œ λžœλ“œλ§ˆν¬ λ°›μ•„μ„œ μž…μˆ  μž˜λΌλ‚΄κΈ°
276
+ # sequenceκ°€ λΉ„μ–΄μžˆλŠ”μ§€ 확인, λΉ„μ–΄μžˆμœΌλ©΄ AssertionError λ©”μ‹œμ§€λ₯Ό 띄움
277
+ assert sequence is not None, "cannot crop from {}.".format(filename) # μ›ν•˜λŠ” 쑰건의 λ³€μˆ˜κ°’μ„ λ³΄μ¦ν•˜κΈ° μœ„ν•΄ μ‚¬μš©
278
+
279
+ # -- save
280
+ data = convert_bgr2gray(sequence) if args.convert_gray else sequence[...,::-1] # gray λ³€ν™˜
281
+ save2npz(dst_pathname, data=data) # 데이터λ₯Ό npz ν˜•μ‹μœΌλ‘œ μ €μž₯
282
+
283
+ print('Done.')
preprocessing/extract_audio_from_video.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Copyright 2020 Imperial College London (Pingchuan Ma)
5
+ # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
6
+
7
+
8
+ """Transforms mp4 audio to npz. Code has strong assumptions on the dataset organization!"""
9
+
10
+ import os
11
+ import librosa # μŒμ› 데이터 뢄석 라이브러리
12
+ import argparse # λͺ…λ Ήν–‰ 인자λ₯Ό νŒŒμ‹±ν•΄μ£ΌλŠ” λͺ¨λ“ˆ
13
+
14
+ from utils import * # utils.py λͺ¨λ“ˆμ— μžˆλŠ” λͺ¨λ“  ν•¨μˆ˜(read_txt_lines(), save2npz(), read_video()) 뢈러였기
15
+
16
+
17
+ # μΈμžκ°’μ„ λ°›μ•„μ„œ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
18
+ def load_args(default_config=None):
19
+ # μΈμžκ°’μ„ 받을 수 μžˆλŠ” μΈμŠ€ν„΄μŠ€ 생성
20
+ parser = argparse.ArgumentParser(description='Extract Audio Waveforms')
21
+
22
+ # μž…λ ₯받을 μΈμžκ°’ 등둝
23
+ # -- utils
24
+ parser.add_argument('--video-direc', default=None, help='raw video directory')
25
+ parser.add_argument('--filename-path', default='./vietnamese_detected_face_30.csv', help='list of detected video and its subject ID')
26
+ parser.add_argument('--save-direc', default=None, help='the directory of saving audio waveforms (.npz)')
27
+ # -- test set only
28
+ parser.add_argument('--testset-only', default=False, action='store_true', help='process testing set only')
29
+
30
+ # μž…λ ₯받은 μΈμžκ°’μ„ args에 μ €μž₯ (type: namespace)
31
+ args = parser.parse_args()
32
+ return args
33
+
34
+ args = load_args() # args νŒŒμ‹± 및 λ‘œλ“œ
35
+
36
+ lines = open(args.filename_path).read().splitlines() # λ¬Έμžμ—΄μ„ '\m' κΈ°μ€€μœΌλ‘œ μͺΌκ°  ν›„ list 생성
37
+ lines = list(filter(lambda x: 'test' == x.split('/')[-2], lines)) if args.testset_only else lines # args.testset_only 값이 μžˆλ‹€λ©΄ test 폴더 속 파일λͺ…λ§Œ λΆˆλŸ¬μ™€μ„œ list 생성, μ•„λ‹ˆλΌλ©΄ μ›λž˜ lines κ·ΈλŒ€λ‘œ κ°’ μœ μ§€
38
+
39
+ # lines 개수만큼 반볡문 μ‹€ν–‰
40
+ for filename_idx, line in enumerate(lines):
41
+
42
+ # 파일λͺ…, μ‚¬λžŒid
43
+ filename, person_id = line.split(',')
44
+ print('idx: {} \tProcessing.\t{}'.format(filename_idx, filename)) # 파일 인덱슀번호, 파일λͺ… 좜λ ₯
45
+
46
+ video_pathname = os.path.join(args.video_direc, filename+'.mp4') # μ˜μƒλ””λ ‰ν† λ¦¬ + 파일λͺ….mp4
47
+ dst_pathname = os.path.join( args.save_direc, filename+'.npz') # μ €μž₯디렉토리 + 파일λͺ….npz
48
+
49
+ # 파일이 μžˆλŠ”μ§€ 확인, μ—†μœΌλ©΄ AssertionError λ©”μ‹œμ§€λ₯Ό 띄움
50
+ assert os.path.isfile(video_pathname), "File does not exist. Path input: {}".format(video_pathname) # μ›ν•˜λŠ” 쑰건의 λ³€μˆ˜κ°’μ„ λ³΄μ¦ν•˜κΈ° μœ„ν•΄ μ‚¬μš©
51
+
52
+ # wav 파일 μ½λŠ” 라이브러리: librosa
53
+ # librosa 둜 데이터λ₯Ό 읽으면 데이터 λ²”μœ„κ°€ [-1,1]둜 μ •κ·œν™”λ¨
54
+ # librosa μž…λ ₯μ—μ„œ sr=None 으둜 μ§€μ •ν•˜μ§€ μ•Šκ³  μž„μ˜μ˜ sample_rateλ₯Ό μ„€μ •ν•˜λ©΄ loadν•  λ•Œ resampling μˆ˜ν–‰ν•¨
55
+ data = librosa.load(video_pathname, sr=16000)[0][-19456:]
56
+ save2npz(dst_pathname, data=data) # librosa 둜 읽은 데이터λ₯Ό npz ν˜•μ‹μœΌλ‘œ μ €μž₯
preprocessing/shape_predictor_68_face_landmarks.dat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
3
+ size 99693937
preprocessing/transform.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 # OpenCV 라이브러리
2
+ import numpy as np
3
+ from skimage import transform as tf # 이미지 λ³€ν™˜ λͺ¨λ“ˆ
4
+
5
+ # -- Landmark interpolation:
6
+ def linear_interpolate(landmarks, start_idx, stop_idx):
7
+ start_landmarks = landmarks[start_idx] # λžœλ“œλ§ˆν¬ μ‹œμž‘
8
+ stop_landmarks = landmarks[stop_idx] # λžœλ“œλ§ˆν¬ 끝
9
+ delta = stop_landmarks - start_landmarks # λžœλ“œλ§ˆν¬ κ°’ 차이
10
+ for idx in range(1, stop_idx-start_idx):
11
+ landmarks[start_idx+idx] = start_landmarks + idx/float(stop_idx-start_idx) * delta # λžœλ“œλ§ˆν¬ μ—…λ°μ΄νŠΈ(보간)
12
+ return landmarks
13
+
14
+ # -- Face Transformation
15
+ # src: μž…λ ₯ μ˜μƒ, dst: 좜λ ₯/κ²°κ³Ό μ˜μƒ
16
+ def warp_img(src, dst, img, std_size):
17
+ tform = tf.estimate_transform('similarity', src, dst) # find the transformation matrix # λ³€ν™˜ ν–‰λ ¬ κ΅¬ν•˜κΈ°
18
+ warped = tf.warp(img, inverse_map=tform.inverse, output_shape=std_size) # wrap the frame image # μ£Όμ–΄μ§„ μ’Œν‘œ λ³€ν™˜μ— 따라 ν”„λ ˆμž„ 이미지 μ™œκ³‘
19
+ warped = warped * 255 # note output from wrap is double image (value range [0,1])
20
+ warped = warped.astype('uint8') # numpy 데이터 νƒ€μž… uint8 으둜 λ³€κ²½
21
+ return warped, tform
22
+
23
+ def apply_transform(transform, img, std_size):
24
+ warped = tf.warp(img, inverse_map=transform.inverse, output_shape=std_size) # wrap the frame image # μ£Όμ–΄μ§„ μ’Œν‘œ λ³€ν™˜μ— 따라 ν”„λ ˆμž„ 이미지 μ™œκ³‘
25
+ warped = warped * 255 # note output from wrap is double image (value range [0,1])
26
+ warped = warped.astype('uint8') # numpy 데이터 νƒ€μž… uint8 으둜 λ³€κ²½
27
+ return warped
28
+
29
+ # -- Crop
30
+ def cut_patch(img, landmarks, height, width, threshold=5):
31
+
32
+ center_x, center_y = np.mean(landmarks, axis=0) # 각 그룹의 같은 μ›μ†ŒλΌλ¦¬ 평균
33
+
34
+ # μ’Œν‘œ 처리
35
+ if center_y - height < 0:
36
+ center_y = height
37
+ if center_y - height < 0 - threshold:
38
+ raise Exception('too much bias in height')
39
+ if center_x - width < 0:
40
+ center_x = width
41
+ if center_x - width < 0 - threshold:
42
+ raise Exception('too much bias in width')
43
+
44
+ if center_y + height > img.shape[0]:
45
+ center_y = img.shape[0] - height
46
+ if center_y + height > img.shape[0] + threshold:
47
+ raise Exception('too much bias in height')
48
+ if center_x + width > img.shape[1]:
49
+ center_x = img.shape[1] - width
50
+ if center_x + width > img.shape[1] + threshold:
51
+ raise Exception('too much bias in width')
52
+
53
+ # λ°°μ—΄ 볡사
54
+ cutted_img = np.copy(img[ int(round(center_y) - round(height)): int(round(center_y) + round(height)),
55
+ int(round(center_x) - round(width)): int(round(center_x) + round(width))])
56
+ return cutted_img
57
+
58
+ # -- RGB to GRAY
59
+ def convert_bgr2gray(data):
60
+ # np.stack(λ°°μ—΄_1, λ°°μ—΄_2, axis=0): μ§€μ •ν•œ axisλ₯Ό μ™„μ „νžˆ μƒˆλ‘œμš΄ axis둜 생각
61
+ return np.stack([cv2.cvtColor(_, cv2.COLOR_BGR2GRAY) for _ in data], axis=0) # gray λ³€ν™˜
preprocessing/utils.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #coding=utf-8
2
+ import os
3
+ import cv2 # OpenCV 라이브러리
4
+ import numpy as np
5
+ from PIL import Image
6
+
7
+
8
+ # -- IO utils
9
+ # ν…μŠ€νŠΈ 라인 뢈러였기
10
+ def read_txt_lines(filepath):
11
+ # 파일이 μžˆλŠ”μ§€ 확인, μ—†μœΌλ©΄ AssertionError λ©”μ‹œμ§€λ₯Ό 띄움
12
+ assert os.path.isfile( filepath ), "Error when trying to read txt file, path does not exist: {}".format(filepath) # μ›ν•˜λŠ” 쑰건의 λ³€μˆ˜κ°’μ„ λ³΄μ¦ν•˜κΈ° μœ„ν•΄ μ‚¬μš©
13
+
14
+ # 파일 뢈러였기
15
+ with open( filepath ) as myfile:
16
+ content = myfile.read().splitlines() # λ¬Έμžμ—΄μ„ '\n' κΈ°μ€€μœΌλ‘œ μͺΌκ°  ν›„ list 생성
17
+ return content
18
+
19
+
20
+ # npz μ €μž₯
21
+ def save2npz(filename, data=None):
22
+ # 데이터가 λΉ„μ–΄μžˆλŠ”μ§€ 확인, μ—†μœΌλ©΄ AssertionError λ©”μ‹œμ§€λ₯Ό 띄움
23
+ assert data is not None, "data is {}".format(data)
24
+
25
+ # 파일 없을 경우
26
+ if not os.path.exists(os.path.dirname(filename)):
27
+ os.makedirs(os.path.dirname(filename)) # 디렉토리 생성
28
+ np.savez_compressed(filename, data=data) # μ••μΆ•λ˜μ§€ μ•Šμ€ .npz 파일 ν˜•μ‹ 으둜 μ—¬λŸ¬ λ°°μ—΄ μ €μž₯
29
+ def save2npz(filename, data=None):
30
+ """save2npz.
31
+ :param filename: str, the fileanme where the data will be saved.
32
+ :param data: ndarray, arrays to save to the file.
33
+ """
34
+ assert data is not None, "data is {}".format(data)
35
+ if not os.path.exists(os.path.dirname(filename)):
36
+ os.makedirs(os.path.dirname(filename))
37
+ np.savez_compressed(filename, data=data)
38
+
39
+ # λΉ„λ””μ˜€ 뢈러였기
40
+ def read_video(filename):
41
+ cap = cv2.VideoCapture(filename) # μ˜μƒ 객체(파일) κ°€μ Έμ˜€κΈ°
42
+
43
+ while(cap.isOpened()): # μ˜μƒ 파일(카메라)이 μ •μƒμ μœΌλ‘œ μ—΄λ ΈλŠ”μ§€(μ΄ˆκΈ°ν™”λ˜μ—ˆλŠ”μ§€) μ—¬λΆ€
44
+ # ret: μ •μƒμ μœΌλ‘œ μ½μ–΄μ™”λŠ”κ°€?
45
+ # frame: ν•œ μž₯의 이미지(frame) κ°€μ Έμ˜€κΈ°
46
+ ret, frame = cap.read() # BGR
47
+ if ret: # ν”„λ ˆμž„ 정보λ₯Ό μ •μƒμ μœΌλ‘œ 읽지 λͺ»ν•˜λ©΄
48
+ yield frame # ν”„λ ˆμž„μ„ ν•¨μˆ˜ λ°”κΉ₯으둜 μ „λ‹¬ν•˜λ©΄μ„œ μ½”λ“œ 싀행을 ν•¨μˆ˜ λ°”κΉ₯에 양보
49
+ else: # ν”„λ ˆμž„ 정보λ₯Ό μ •μƒμ μœΌλ‘œ 읽지 λͺ»ν•˜λ©΄
50
+ break # while λΉ μ Έλ‚˜κ°€κΈ°
51
+ cap.release() # μ˜μƒ 파일(카메라) μ‚¬μš© μ’…λ£Œ
52
+
53
+
54
+
55
+ # Video 정보 κ°€μ Έμ˜€κΈ°
56
+ def get_video_info(infilename, is_print=False):
57
+ cap = cv2.VideoCapture(infilename)
58
+ if not cap.isOpened():
59
+ print("could not open : ", infilename)
60
+ cap.release()
61
+ exit(0)
62
+
63
+ length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
64
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
65
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
66
+ fps = cap.get(cv2.CAP_PROP_FPS)
67
+ cap.release()
68
+
69
+ if is_print:
70
+ print('length : ', length)
71
+ print('width : ', width)
72
+ print('height : ', height)
73
+ print('fps : ', fps)
74
+
75
+ video_info = {
76
+ 'length': length,
77
+ 'width': width,
78
+ 'height': height,
79
+ 'fps': fps,
80
+ }
81
+
82
+ return video_info
83
+
84
+ # Video -> Numpy
85
+ # μ°Έκ³  κΉƒν—ˆλΈŒ μ½”λ“œ: https://github.com/khazit/Lip2Word/blob/master/lipReader.py#L22
86
+ def videoToArray(video_pathname, is_gray=True) :
87
+
88
+ cap = cv2.VideoCapture(video_pathname) # μ˜μƒ 객체(파일) κ°€μ Έμ˜€κΈ°
89
+
90
+ # μ˜μƒ 파일(카메라)이 μ •μƒμ μœΌλ‘œ 열리지 μ•Šμ€ 경우
91
+ if not cap.isOpened():
92
+ print("could not open : ", video_pathname)
93
+ cap.release() # μ˜μƒ 파일(카메라) μ‚¬μš© μ’…λ£Œ
94
+ exit(0) # λΉ μ Έλ‚˜κ°€κΈ°
95
+
96
+ n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # μ˜μƒ ν”„λ ˆμž„ 개수
97
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # μ˜μƒ λ„ˆλΉ„
98
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # μ˜μƒ 높이
99
+ fps = cap.get(cv2.CAP_PROP_FPS) # μ˜μƒ FPS(Frames Per Second)
100
+
101
+ if is_gray:
102
+ video = np.zeros((n_frames, height, width)) # gray
103
+ else:
104
+ n_channels=3
105
+ video = np.zeros((n_frames, height, width, n_channels)) # color
106
+
107
+ video = video.astype(np.uint8)
108
+
109
+ i = 0
110
+ while True :
111
+ success, frame = cap.read()
112
+ if not success :
113
+ break
114
+ else :
115
+ # gray scale 적용
116
+ if is_gray:
117
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
118
+
119
+ video[i] = frame
120
+ i += 1
121
+
122
+ cap.release() # μ˜μƒ 파일(카메라) μ‚¬μš© μ’…λ£Œ
123
+
124
+ return video # μ˜μƒ 정보 μ•žμ— μ˜μƒ ν”„λ ˆμž„ 개수λ₯Ό μΆ”κ°€ν•œ numpy λ°˜ν™˜
125
+
126
+
127
+ # Frame Sampling (ν”„λ ˆμž„ 개수 λ§žμΆ”κΈ°)
128
+ # μ°Έκ³  κΉƒν—ˆλΈŒ μ½”λ“œ: https://github.com/khazit/Lip2Word/blob/master/lipReader.py#L62
129
+ def frameAdjust(video, target_frames=29):
130
+ n_frames = video.shape[0] # μ˜μƒ ν”„λ ˆμž„ 개수
131
+
132
+ if target_frames == n_frames :
133
+ return video # μ˜μƒ κ·ΈλŒ€λ‘œ λ°˜ν™˜
134
+ else :
135
+ # μ˜μƒ ν”„λ ˆμž„ 개수 > μ›ν•˜λŠ” ν”„λ ˆμž„ 개수
136
+ if n_frames > target_frames :
137
+ idx = np.linspace(0, n_frames-1, target_frames) # 숫자 μ‹œν€€μŠ€ 생성 # ꡬ간 μ‹œμž‘μ , ꡬ간 끝점, ꡬ간 λ‚΄ 숫자 개수
138
+ idx = np.around(idx, 0).astype(np.int32) # λ°˜μ˜¬λ¦Όν•˜κ³  dtype 을 μ •μˆ˜λ‘œ λ³€κ²½
139
+ return video[idx] # μ›ν•˜λŠ” ν”„λ ˆμž„ 개수둜 sampling ν•œ μ˜μƒ
140
+ # μ˜μƒ ν”„λ ˆμž„ 개수 < μ›ν•˜λŠ” ν”„λ ˆμž„ 개수
141
+ else :
142
+ output_video = np.zeros((target_frames, *video.shape[1:])).astype(np.uint8) # μ›ν•˜λŠ” ν”„λ ˆμž„ κ°œμˆ˜μ— λ§žμΆ°μ„œ 0으둜 μ΄ˆκΈ°ν™”ν•œ numpy 생성
143
+ output_video[:n_frames] = video # μ˜μƒ ν”„λ ˆμž„ κ°œμˆ˜κΉŒμ§€ κ·ΈλŒ€λ‘œ μ˜μƒ 정보 μ €μž₯
144
+
145
+ # μ›ν•˜λŠ” ν”„λ ˆμž„ 개수만큼 λ§ˆμ§€λ§‰ ν”„λ ˆμž„ 볡제
146
+ for i in range(target_frames-n_frames+1) :
147
+ output_video[i+n_frames-1] = output_video[n_frames-1]
148
+
149
+ return output_video # μ›ν•˜λŠ” ν”„λ ˆμž„ 개수둜 sampling ν•œ μ˜μƒ
preprocessing/vietnamese_detected_face_30_words.csv ADDED
The diff for this file is too large to render. See raw diff
 
preprocessing/vietnamese_detected_face_30_words_have_snr.csv ADDED
The diff for this file is too large to render. See raw diff