Spaces:
Build error
Build error
Ashhar
commited on
Commit
·
3683b73
1
Parent(s):
270fed8
face match detector
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Dockerfile +0 -19
- README.md +6 -5
- __pycache__/app.cpython-312.pyc +0 -0
- app.py +75 -207
- facewrapper/dict/data1.bin +0 -3
- facewrapper/dict/data2.bin +0 -3
- facewrapper/dict/data3.bin +0 -3
- facewrapper/dict/detect.bin +0 -3
- facewrapper/facewrapper.py +0 -32
- facewrapper/libs/libimutils.so +0 -0
- facewrapper/libs/libimutils.so_for_ubuntu22 +0 -0
- facewrapper/libs/libttvfaceengine6.so +0 -3
- gradio/demo.py +0 -115
- gradio/examples/1.jpg +0 -0
- gradio/examples/2.jpg +0 -0
- gradio/examples/3.jpg +0 -0
- gradio/examples/4.jpg +0 -0
- openvino/cache.json +0 -0
- openvino/libgna.so +0 -3
- openvino/libgna.so.2 +0 -3
- openvino/libgna.so.3.0.0.1455 +0 -3
- openvino/libopenvino.so +0 -3
- openvino/libopenvino_auto_batch_plugin.so +0 -0
- openvino/libopenvino_auto_plugin.so +0 -0
- openvino/libopenvino_c.so +0 -0
- openvino/libopenvino_gapi_preproc.so +0 -3
- openvino/libopenvino_hetero_plugin.so +0 -0
- openvino/libopenvino_intel_cpu_plugin.so +0 -3
- openvino/libopenvino_intel_gna_plugin.so +0 -3
- openvino/libopenvino_intel_hddl_plugin.so +0 -3
- openvino/libopenvino_intel_myriad_plugin.so +0 -3
- openvino/libopenvino_ir_frontend.so +0 -0
- openvino/libopenvino_onnx_frontend.so +0 -3
- openvino/libopenvino_paddle_frontend.so +0 -0
- openvino/libopenvino_tensorflow_fe.so +0 -3
- openvino/pcie-ma2x8x.mvcmd +0 -3
- openvino/plugins.xml +0 -27
- openvino/usb-ma2x8x.mvcmd +0 -3
- openvino/vpu_custom_kernels/binarization.bin +0 -3
- openvino/vpu_custom_kernels/binarization.cl +0 -67
- openvino/vpu_custom_kernels/binary_convolution.bin +0 -3
- openvino/vpu_custom_kernels/binary_convolution.cl +0 -95
- openvino/vpu_custom_kernels/binary_convolution1x1.bin +0 -3
- openvino/vpu_custom_kernels/binary_convolution1x1.cl +0 -117
- openvino/vpu_custom_kernels/binary_convolution3x3.bin +0 -3
- openvino/vpu_custom_kernels/binary_convolution3x3.cl +0 -278
- openvino/vpu_custom_kernels/convolution1x1_chw.bin +0 -3
- openvino/vpu_custom_kernels/convolution1x1_chw.cl +0 -114
- openvino/vpu_custom_kernels/convolution1x1_hwc.bin +0 -3
- openvino/vpu_custom_kernels/convolution1x1_hwc.cl +0 -126
Dockerfile
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
FROM ubuntu:20.04
|
2 |
-
RUN ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
|
3 |
-
RUN apt-get update -y
|
4 |
-
RUN apt-get install -y python3 python3-pip python3-opencv
|
5 |
-
RUN apt-get install -y libcurl4-openssl-dev libssl-dev
|
6 |
-
RUN mkdir -p /home/FaceOnLive_v6
|
7 |
-
RUN mkdir -p /home/FaceOnLive_v6/facewrapper
|
8 |
-
WORKDIR /home/FaceOnLive_v6
|
9 |
-
COPY ./facewrapper ./facewrapper
|
10 |
-
COPY ./facewrapper/libs/libimutils.so /usr/lib
|
11 |
-
COPY ./gradio ./gradio
|
12 |
-
COPY ./openvino /usr/lib
|
13 |
-
COPY ./app.py ./app.py
|
14 |
-
COPY ./run.sh .
|
15 |
-
COPY ./requirements.txt ./requirements.txt
|
16 |
-
RUN pip3 install -r requirements.txt
|
17 |
-
RUN chmod a+x run.sh
|
18 |
-
CMD ["./run.sh"]
|
19 |
-
EXPOSE 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
---
|
2 |
-
title: Face
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
|
|
|
|
7 |
pinned: false
|
8 |
-
license: mit
|
9 |
short_description: FaceOnLive On-Premise Solution
|
10 |
---
|
11 |
|
|
|
1 |
---
|
2 |
+
title: Face Matching Tool
|
3 |
+
emoji: 🤡
|
4 |
colorFrom: yellow
|
5 |
+
colorTo: red
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.36.1
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
short_description: FaceOnLive On-Premise Solution
|
11 |
---
|
12 |
|
__pycache__/app.cpython-312.pyc
ADDED
Binary file (3.92 kB). View file
|
|
app.py
CHANGED
@@ -1,217 +1,85 @@
|
|
1 |
-
import sys
|
2 |
-
sys.path.append('.')
|
3 |
-
|
4 |
-
from flask import Flask, request, jsonify
|
5 |
-
from time import gmtime, strftime
|
6 |
-
import os
|
7 |
-
import base64
|
8 |
-
import json
|
9 |
import cv2
|
10 |
-
import
|
|
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
from facewrapper.facewrapper import ttv_get_hwid
|
14 |
-
from facewrapper.facewrapper import ttv_init
|
15 |
-
from facewrapper.facewrapper import ttv_init_offline
|
16 |
-
from facewrapper.facewrapper import ttv_extract_feature
|
17 |
-
from facewrapper.facewrapper import ttv_compare_feature
|
18 |
|
19 |
-
app = Flask(__name__)
|
20 |
|
21 |
-
|
22 |
-
|
23 |
|
24 |
-
licenseKey = os.environ.get("LICENSE_KEY")
|
25 |
-
licensePath = "license.txt"
|
26 |
-
modelFolder = os.path.abspath(os.path.dirname(__file__)) + '/facewrapper/dict'
|
27 |
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
30 |
|
31 |
-
ret = ttv_init(modelFolder.encode('utf-8'), licenseKey.encode('utf-8'))
|
32 |
-
if ret != 0:
|
33 |
-
print(f"online init failed: {ret}");
|
34 |
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
else
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
response = jsonify({"status": status, "data": {"result": result}})
|
66 |
-
response.status_code = 200
|
67 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
68 |
-
return response
|
69 |
-
|
70 |
-
faceRect1 = np.zeros([4], dtype=np.int32)
|
71 |
-
feature1 = np.zeros([2048], dtype=np.uint8)
|
72 |
-
featureSize1 = np.zeros([1], dtype=np.int32)
|
73 |
-
|
74 |
-
ret = ttv_extract_feature(image1, image1.shape[1], image1.shape[0], faceRect1, feature1, featureSize1)
|
75 |
-
if ret <= 0:
|
76 |
-
if ret == -1:
|
77 |
-
result = "license error!"
|
78 |
-
elif ret == -2:
|
79 |
-
result = "init error!"
|
80 |
-
elif ret == 0:
|
81 |
-
result = "image1: no face detected!"
|
82 |
-
|
83 |
-
status = "ok"
|
84 |
-
response = jsonify({"status": status, "data": {"result": result}})
|
85 |
-
response.status_code = 200
|
86 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
87 |
-
return response
|
88 |
-
|
89 |
-
faceRect2 = np.zeros([4], dtype=np.int32)
|
90 |
-
feature2 = np.zeros([2048], dtype=np.uint8)
|
91 |
-
featureSize2 = np.zeros([1], dtype=np.int32)
|
92 |
-
|
93 |
-
ret = ttv_extract_feature(image2, image2.shape[1], image2.shape[0], faceRect2, feature2, featureSize2)
|
94 |
-
if ret <= 0:
|
95 |
-
if ret == -1:
|
96 |
-
result = "license error!"
|
97 |
-
elif ret == -2:
|
98 |
-
result = "init error!"
|
99 |
-
elif ret == 0:
|
100 |
-
result = "image2: no face detected!"
|
101 |
-
|
102 |
-
status = "ok"
|
103 |
-
response = jsonify({"status": status, "data": {"result": result}})
|
104 |
-
response.status_code = 200
|
105 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
106 |
-
return response
|
107 |
-
|
108 |
-
similarity = ttv_compare_feature(feature1, feature2)
|
109 |
-
if similarity > 0.7:
|
110 |
-
result = "same"
|
111 |
-
else:
|
112 |
-
result = "different"
|
113 |
-
|
114 |
-
status = "ok"
|
115 |
-
response = jsonify(
|
116 |
-
{
|
117 |
-
"status": status,
|
118 |
-
"data": {
|
119 |
-
"result": result,
|
120 |
-
"similarity": float(similarity),
|
121 |
-
"face1": {"x1": int(faceRect1[0]), "y1": int(faceRect1[1]), "x2": int(faceRect1[2]), "y2" : int(faceRect1[3])},
|
122 |
-
"face2": {"x1": int(faceRect2[0]), "y1": int(faceRect2[1]), "x2": int(faceRect2[2]), "y2" : int(faceRect2[3])},
|
123 |
-
}
|
124 |
-
})
|
125 |
-
|
126 |
-
response.status_code = 200
|
127 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
128 |
-
return response
|
129 |
-
|
130 |
-
|
131 |
-
@app.route('/api/compare_face_base64', methods=['POST'])
|
132 |
-
def coompare_face_base64():
|
133 |
-
content = request.get_json()
|
134 |
-
imageBase641 = content['image1']
|
135 |
-
image1 = cv2.imdecode(np.frombuffer(base64.b64decode(imageBase641), dtype=np.uint8), cv2.IMREAD_COLOR)
|
136 |
-
|
137 |
-
if image1 is None:
|
138 |
-
result = "image1: is null!"
|
139 |
-
status = "ok"
|
140 |
-
response = jsonify({"status": status, "data": {"result": result}})
|
141 |
-
response.status_code = 200
|
142 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
143 |
-
return response
|
144 |
-
|
145 |
-
imageBase642 = content['image2']
|
146 |
-
image2 = cv2.imdecode(np.frombuffer(base64.b64decode(imageBase642), dtype=np.uint8), cv2.IMREAD_COLOR)
|
147 |
-
|
148 |
-
if image2 is None:
|
149 |
-
result = "image2: is null!"
|
150 |
-
status = "ok"
|
151 |
-
response = jsonify({"status": status, "data": {"result": result}})
|
152 |
-
response.status_code = 200
|
153 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
154 |
-
return response
|
155 |
-
|
156 |
-
faceRect1 = np.zeros([4], dtype=np.int32)
|
157 |
-
feature1 = np.zeros([2048], dtype=np.uint8)
|
158 |
-
featureSize1 = np.zeros([1], dtype=np.int32)
|
159 |
-
|
160 |
-
ret = ttv_extract_feature(image1, image1.shape[1], image1.shape[0], faceRect1, feature1, featureSize1)
|
161 |
-
if ret <= 0:
|
162 |
-
if ret == -1:
|
163 |
-
result = "license error!"
|
164 |
-
elif ret == -2:
|
165 |
-
result = "init error!"
|
166 |
-
elif ret == 0:
|
167 |
-
result = "image1: no face detected!"
|
168 |
-
|
169 |
-
status = "ok"
|
170 |
-
response = jsonify({"status": status, "data": {"result": result}})
|
171 |
-
response.status_code = 200
|
172 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
173 |
-
return response
|
174 |
-
|
175 |
-
faceRect2 = np.zeros([4], dtype=np.int32)
|
176 |
-
feature2 = np.zeros([2048], dtype=np.uint8)
|
177 |
-
featureSize2 = np.zeros([1], dtype=np.int32)
|
178 |
-
|
179 |
-
ret = ttv_extract_feature(image2, image2.shape[1], image2.shape[0], faceRect2, feature2, featureSize2)
|
180 |
-
if ret <= 0:
|
181 |
-
if ret == -1:
|
182 |
-
result = "license error!"
|
183 |
-
elif ret == -2:
|
184 |
-
result = "init error!"
|
185 |
-
elif ret == 0:
|
186 |
-
result = "image2: no face detected!"
|
187 |
-
|
188 |
-
status = "ok"
|
189 |
-
response = jsonify({"status": status, "data": {"result": result}})
|
190 |
-
response.status_code = 200
|
191 |
-
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
192 |
-
return response
|
193 |
-
|
194 |
-
similarity = ttv_compare_feature(feature1, feature2)
|
195 |
-
if similarity > 0.7:
|
196 |
-
result = "same"
|
197 |
else:
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import cv2
|
2 |
+
import face_recognition
|
3 |
+
import gradio as gr
|
4 |
+
import datetime as DT
|
5 |
+
import pytz
|
6 |
|
7 |
+
ipAddress = None
|
|
|
|
|
|
|
|
|
|
|
8 |
|
|
|
9 |
|
10 |
+
def __nowInIST():
|
11 |
+
return DT.datetime.now(pytz.timezone("Asia/Kolkata"))
|
12 |
|
|
|
|
|
|
|
13 |
|
14 |
+
def __attachIp(request: gr.Request):
|
15 |
+
global ipAddress
|
16 |
+
x_forwarded_for = request.headers.get('x-forwarded-for')
|
17 |
+
if x_forwarded_for:
|
18 |
+
ipAddress = x_forwarded_for
|
19 |
|
|
|
|
|
|
|
20 |
|
21 |
+
def pprint(log: str):
|
22 |
+
now = __nowInIST()
|
23 |
+
now = now.strftime("%Y-%m-%d %H:%M:%S")
|
24 |
+
print(f"[{now}] [{ipAddress}] {log}")
|
25 |
|
26 |
+
|
27 |
+
def __findFaceEncodings(imagePath):
|
28 |
+
image = cv2.imread(imagePath)
|
29 |
+
faceEncodings = face_recognition.face_encodings(image)
|
30 |
+
return faceEncodings[0] if len(faceEncodings) > 0 else None
|
31 |
+
|
32 |
+
|
33 |
+
def predictMatch(firstImage, secondImage):
|
34 |
+
image1Encoding = __findFaceEncodings(firstImage)
|
35 |
+
image2Encoding = __findFaceEncodings(secondImage)
|
36 |
+
|
37 |
+
pprint("Starting the job...")
|
38 |
+
distance = face_recognition.face_distance([image1Encoding], image2Encoding)
|
39 |
+
distancePercent = round(distance[0] * 100)
|
40 |
+
matchPercent = 100 - distancePercent
|
41 |
+
pprint(f"Job finished. Match : {matchPercent}%")
|
42 |
+
|
43 |
+
isSame = matchPercent > 50
|
44 |
+
|
45 |
+
amplificationFactor = 1.5
|
46 |
+
adjustment = (amplificationFactor - 0.5) * 100
|
47 |
+
|
48 |
+
outputTexts = []
|
49 |
+
if isSame:
|
50 |
+
outputTexts.append("The images are of the same person ✅")
|
51 |
+
matchScore = round((matchPercent + adjustment) / (100 + adjustment) * 100)
|
52 |
+
outputTexts.append(f"\nMatch Score: {matchScore}%")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
else:
|
54 |
+
outputTexts.append("The images are not of the same person ❌")
|
55 |
+
|
56 |
+
outputText = "\n".join(outputTexts)
|
57 |
+
pprint(f"{outputText=}")
|
58 |
+
|
59 |
+
return outputText
|
60 |
+
|
61 |
+
|
62 |
+
with gr.Row(elem_classes=["main-container"]):
|
63 |
+
with gr.Row(elem_classes=["img-container"]):
|
64 |
+
firstImage = gr.Image(type='filepath', height=250, elem_classes=["image"])
|
65 |
+
secondImage = gr.Image(type='filepath', height=250, elem_classes=["image"])
|
66 |
+
|
67 |
+
with gr.Row(elem_classes=["output-container"]):
|
68 |
+
result = gr.Textbox(label="Result", elem_classes=["output"])
|
69 |
+
|
70 |
+
|
71 |
+
with gr.Interface(
|
72 |
+
fn=predictMatch,
|
73 |
+
inputs=[
|
74 |
+
firstImage,
|
75 |
+
secondImage,
|
76 |
+
],
|
77 |
+
outputs=[
|
78 |
+
result
|
79 |
+
],
|
80 |
+
title="Face Match Detector",
|
81 |
+
allow_flagging="never",
|
82 |
+
) as demo:
|
83 |
+
demo.load(__attachIp, None, None)
|
84 |
+
|
85 |
+
demo.launch(debug=True)
|
facewrapper/dict/data1.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:36cf5fcc49345989a86839a53529314ec1fe5d621c377a1952bc7538d55e7f1b
|
3 |
-
size 16255630
|
|
|
|
|
|
|
|
facewrapper/dict/data2.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f25fb0cd3d70cb84c258e7109620f411c087e0875828d6ab86cc9c4838d49bec
|
3 |
-
size 11875339
|
|
|
|
|
|
|
|
facewrapper/dict/data3.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:06daf36a8545f59ac104415e8b2d38072d06abc027cb346fd4a6c6029fed55b4
|
3 |
-
size 90716260
|
|
|
|
|
|
|
|
facewrapper/dict/detect.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0b505c320dd8add047f107549849a307d0c6f518f01c1d3402bce9e13a765146
|
3 |
-
size 28463173
|
|
|
|
|
|
|
|
facewrapper/facewrapper.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
import ctypes, ctypes.util
|
2 |
-
from ctypes import *
|
3 |
-
from numpy.ctypeslib import ndpointer
|
4 |
-
import sys
|
5 |
-
import os
|
6 |
-
|
7 |
-
lib_path = os.path.abspath(os.path.dirname(__file__)) + '/libs/libttvfaceengine6.so'
|
8 |
-
liveness_engine = cdll.LoadLibrary(lib_path)
|
9 |
-
|
10 |
-
ttv_version = liveness_engine.ttv_version
|
11 |
-
ttv_version.argtypes = []
|
12 |
-
ttv_version.restype = ctypes.c_char_p
|
13 |
-
|
14 |
-
ttv_get_hwid = liveness_engine.ttv_get_hwid
|
15 |
-
ttv_get_hwid.argtypes = []
|
16 |
-
ttv_get_hwid.restype = ctypes.c_char_p
|
17 |
-
|
18 |
-
ttv_init = liveness_engine.ttv_init
|
19 |
-
ttv_init.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
|
20 |
-
ttv_init.restype = ctypes.c_int32
|
21 |
-
|
22 |
-
ttv_init_offline = liveness_engine.ttv_init_offline
|
23 |
-
ttv_init_offline.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
|
24 |
-
ttv_init_offline.restype = ctypes.c_int32
|
25 |
-
|
26 |
-
ttv_extract_feature = liveness_engine.ttv_extract_feature
|
27 |
-
ttv_extract_feature.argtypes = [ndpointer(ctypes.c_ubyte, flags='C_CONTIGUOUS'), ctypes.c_int32, ctypes.c_int32, ndpointer(ctypes.c_int32, flags='C_CONTIGUOUS'), ndpointer(ctypes.c_ubyte, flags='C_CONTIGUOUS'), ndpointer(ctypes.c_int32, flags='C_CONTIGUOUS')]
|
28 |
-
ttv_extract_feature.restype = ctypes.c_int
|
29 |
-
|
30 |
-
ttv_compare_feature = liveness_engine.ttv_compare_feature
|
31 |
-
ttv_compare_feature.argtypes = [ndpointer(ctypes.c_ubyte, flags='C_CONTIGUOUS'), ndpointer(ctypes.c_ubyte, flags='C_CONTIGUOUS')]
|
32 |
-
ttv_compare_feature.restype = ctypes.c_double
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
facewrapper/libs/libimutils.so
DELETED
Binary file (412 kB)
|
|
facewrapper/libs/libimutils.so_for_ubuntu22
DELETED
Binary file (412 kB)
|
|
facewrapper/libs/libttvfaceengine6.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:01d366a336938c06f53a77d36a590a571898cccb297b768a4df7490baac12b80
|
3 |
-
size 4998416
|
|
|
|
|
|
|
|
gradio/demo.py
DELETED
@@ -1,115 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import requests
|
3 |
-
import json
|
4 |
-
from PIL import Image
|
5 |
-
|
6 |
-
def compare_face(frame1, frame2):
|
7 |
-
url = "http://127.0.0.1:8000/api/compare_face"
|
8 |
-
files = {'image1': open(frame1, 'rb'), 'image2': open(frame2, 'rb')}
|
9 |
-
|
10 |
-
r = requests.post(url=url, files=files)
|
11 |
-
faces = None
|
12 |
-
|
13 |
-
try:
|
14 |
-
image1 = Image.open(frame1)
|
15 |
-
image2 = Image.open(frame2)
|
16 |
-
|
17 |
-
face1 = None
|
18 |
-
face2 = None
|
19 |
-
data = r.json().get('data')
|
20 |
-
if data.get('face1') is not None:
|
21 |
-
face = data.get('face1')
|
22 |
-
x1 = face.get('x1')
|
23 |
-
y1 = face.get('y1')
|
24 |
-
x2 = face.get('x2')
|
25 |
-
y2 = face.get('y2')
|
26 |
-
if x1 < 0:
|
27 |
-
x1 = 0
|
28 |
-
if y1 < 0:
|
29 |
-
y1 = 0
|
30 |
-
if x2 >= image1.width:
|
31 |
-
x2 = image1.width - 1
|
32 |
-
if y2 >= image1.height:
|
33 |
-
y2 = image1.height - 1
|
34 |
-
|
35 |
-
face1 = image1.crop((x1, y1, x2, y2))
|
36 |
-
face_image_ratio = face1.width / float(face1.height)
|
37 |
-
resized_w = int(face_image_ratio * 150)
|
38 |
-
resized_h = 150
|
39 |
-
|
40 |
-
face1 = face1.resize((int(resized_w), int(resized_h)))
|
41 |
-
|
42 |
-
if data.get('face2') is not None:
|
43 |
-
face = data.get('face2')
|
44 |
-
x1 = face.get('x1')
|
45 |
-
y1 = face.get('y1')
|
46 |
-
x2 = face.get('x2')
|
47 |
-
y2 = face.get('y2')
|
48 |
-
|
49 |
-
if x1 < 0:
|
50 |
-
x1 = 0
|
51 |
-
if y1 < 0:
|
52 |
-
y1 = 0
|
53 |
-
if x2 >= image2.width:
|
54 |
-
x2 = image2.width - 1
|
55 |
-
if y2 >= image2.height:
|
56 |
-
y2 = image2.height - 1
|
57 |
-
|
58 |
-
face2 = image2.crop((x1, y1, x2, y2))
|
59 |
-
face_image_ratio = face2.width / float(face2.height)
|
60 |
-
resized_w = int(face_image_ratio * 150)
|
61 |
-
resized_h = 150
|
62 |
-
|
63 |
-
face2 = face2.resize((int(resized_w), int(resized_h)))
|
64 |
-
|
65 |
-
if face1 is not None and face2 is not None:
|
66 |
-
new_image = Image.new('RGB',(face1.width + face2.width + 10, 150), (80,80,80))
|
67 |
-
|
68 |
-
new_image.paste(face1,(0,0))
|
69 |
-
new_image.paste(face2,(face1.width + 10, 0))
|
70 |
-
faces = new_image.copy()
|
71 |
-
elif face1 is not None and face2 is None:
|
72 |
-
new_image = Image.new('RGB',(face1.width + face1.width + 10, 150), (80,80,80))
|
73 |
-
|
74 |
-
new_image.paste(face1,(0,0))
|
75 |
-
faces = new_image.copy()
|
76 |
-
elif face1 is None and face2 is not None:
|
77 |
-
new_image = Image.new('RGB',(face2.width + face2.width + 10, 150), (80,80,80))
|
78 |
-
|
79 |
-
new_image.paste(face2,(face2.width + 10, 0))
|
80 |
-
faces = new_image.copy()
|
81 |
-
except:
|
82 |
-
pass
|
83 |
-
|
84 |
-
return [r.json(), faces]
|
85 |
-
|
86 |
-
with gr.Blocks() as demo:
|
87 |
-
gr.Markdown(
|
88 |
-
"""
|
89 |
-
# Face Recognition
|
90 |
-
Get your own Face Recognition Server by duplicating this space.<br/>
|
91 |
-
Or run on your own machine using docker.<br/>
|
92 |
-
```docker run -it -p 7860:7860 --platform=linux/amd64 \
|
93 |
-
-e LICENSE_KEY="YOUR_VALUE_HERE" \
|
94 |
-
registry.hf.space/faceonlive-face-recognition-sdk:latest ```<br/><br/>
|
95 |
-
Contact us at https://faceonlive.com for issues and support.<br/>
|
96 |
-
"""
|
97 |
-
)
|
98 |
-
with gr.Row():
|
99 |
-
with gr.Column():
|
100 |
-
compare_face_input1 = gr.Image(type='filepath', height=480)
|
101 |
-
gr.Examples(['gradio/examples/1.jpg', 'gradio/examples/2.jpg'],
|
102 |
-
inputs=compare_face_input1)
|
103 |
-
compare_face_button = gr.Button("Compare Face")
|
104 |
-
with gr.Column():
|
105 |
-
compare_face_input2 = gr.Image(type='filepath', height=480)
|
106 |
-
gr.Examples(['gradio/examples/3.jpg', 'gradio/examples/4.jpg'],
|
107 |
-
inputs=compare_face_input2)
|
108 |
-
with gr.Column():
|
109 |
-
compare_face_output = gr.Image(type="pil", height=300)
|
110 |
-
compare_result_output = gr.JSON(label='Result')
|
111 |
-
|
112 |
-
compare_face_button.click(compare_face, inputs=[compare_face_input1, compare_face_input2], outputs=[compare_result_output, compare_face_output])
|
113 |
-
gr.HTML('<a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FFaceOnLive%2FFace-Recognition-SDK"><img src="https://api.visitorbadge.io/api/combined?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FFaceOnLive%2FFace-Recognition-SDK&labelColor=%23ff8a65&countColor=%2337d67a&style=flat&labelStyle=upper" /></a>')
|
114 |
-
|
115 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gradio/examples/1.jpg
DELETED
Binary file (14.1 kB)
|
|
gradio/examples/2.jpg
DELETED
Binary file (5.97 kB)
|
|
gradio/examples/3.jpg
DELETED
Binary file (13.3 kB)
|
|
gradio/examples/4.jpg
DELETED
Binary file (6.47 kB)
|
|
openvino/cache.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
openvino/libgna.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:22441d86dca92b00ae7fb9d315bcb1c6a8a213ac4fe86396489753ebe76f869e
|
3 |
-
size 3120536
|
|
|
|
|
|
|
|
openvino/libgna.so.2
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:22441d86dca92b00ae7fb9d315bcb1c6a8a213ac4fe86396489753ebe76f869e
|
3 |
-
size 3120536
|
|
|
|
|
|
|
|
openvino/libgna.so.3.0.0.1455
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:22441d86dca92b00ae7fb9d315bcb1c6a8a213ac4fe86396489753ebe76f869e
|
3 |
-
size 3120536
|
|
|
|
|
|
|
|
openvino/libopenvino.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fd216848c1ba78e62360c12c9684df0c160f6962f3d900e5918cc042b42b2b46
|
3 |
-
size 13495416
|
|
|
|
|
|
|
|
openvino/libopenvino_auto_batch_plugin.so
DELETED
Binary file (391 kB)
|
|
openvino/libopenvino_auto_plugin.so
DELETED
Binary file (371 kB)
|
|
openvino/libopenvino_c.so
DELETED
Binary file (305 kB)
|
|
openvino/libopenvino_gapi_preproc.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3ac5ce0a8f3acefb41e8aa8161f78035dafff25c4b8c3485ebc541573b2b15f0
|
3 |
-
size 1312920
|
|
|
|
|
|
|
|
openvino/libopenvino_hetero_plugin.so
DELETED
Binary file (367 kB)
|
|
openvino/libopenvino_intel_cpu_plugin.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:afe05ada6d5b11495a21787fa6ab0162fc40f7a9ab97be78f7b7185126d15b18
|
3 |
-
size 33299880
|
|
|
|
|
|
|
|
openvino/libopenvino_intel_gna_plugin.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ef15b623e7f81788160c4056ccd5e887a8184affe381e84a906646ef36cae1ab
|
3 |
-
size 4067016
|
|
|
|
|
|
|
|
openvino/libopenvino_intel_hddl_plugin.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:96362327fbc404e88583bdcd2a526ccbf4ca26d4ecdb8898234be7986d9b8b2b
|
3 |
-
size 5894680
|
|
|
|
|
|
|
|
openvino/libopenvino_intel_myriad_plugin.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e596436002565356b80400e0d7e50093d53d338f623b171f658de527477852de
|
3 |
-
size 6120168
|
|
|
|
|
|
|
|
openvino/libopenvino_ir_frontend.so
DELETED
Binary file (343 kB)
|
|
openvino/libopenvino_onnx_frontend.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0770ed09d471b20bffcf4ef57ab1fb002db04c4404598bd5c52a4418a67f5441
|
3 |
-
size 3781640
|
|
|
|
|
|
|
|
openvino/libopenvino_paddle_frontend.so
DELETED
Binary file (987 kB)
|
|
openvino/libopenvino_tensorflow_fe.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c2dadbcd8ba32cec02873caf8dcc644d1d8856cdcd2978c603e5bac169e01bb9
|
3 |
-
size 2723864
|
|
|
|
|
|
|
|
openvino/pcie-ma2x8x.mvcmd
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f03146453508f2bcab1589907bccaa429b48db6123a7b8a428d6ce221d1fbb4d
|
3 |
-
size 2099248
|
|
|
|
|
|
|
|
openvino/plugins.xml
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
<ie>
|
2 |
-
<plugins>
|
3 |
-
<plugin name="AUTO" location="libopenvino_auto_plugin.so">
|
4 |
-
<properties>
|
5 |
-
<property key="MULTI_WORK_MODE_AS_AUTO" value="YES"/>
|
6 |
-
</properties>
|
7 |
-
</plugin>
|
8 |
-
<plugin name="BATCH" location="libopenvino_auto_batch_plugin.so">
|
9 |
-
</plugin>
|
10 |
-
<plugin name="CPU" location="libopenvino_intel_cpu_plugin.so">
|
11 |
-
</plugin>
|
12 |
-
<plugin name="GNA" location="libopenvino_intel_gna_plugin.so">
|
13 |
-
</plugin>
|
14 |
-
<plugin name="GPU" location="libopenvino_intel_gpu_plugin.so">
|
15 |
-
</plugin>
|
16 |
-
<plugin name="HETERO" location="libopenvino_hetero_plugin.so">
|
17 |
-
</plugin>
|
18 |
-
<plugin name="MULTI" location="libopenvino_auto_plugin.so">
|
19 |
-
</plugin>
|
20 |
-
<plugin name="MYRIAD" location="libopenvino_intel_myriad_plugin.so">
|
21 |
-
</plugin>
|
22 |
-
<plugin name="HDDL" location="libopenvino_intel_hddl_plugin.so">
|
23 |
-
</plugin>
|
24 |
-
<plugin name="VPUX" location="libopenvino_intel_vpux_plugin.so">
|
25 |
-
</plugin>
|
26 |
-
</plugins>
|
27 |
-
</ie>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openvino/usb-ma2x8x.mvcmd
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:faf33388b88708177a358fcb4704eba04b1cf9e88d6a047f90c833d686140a2e
|
3 |
-
size 2298632
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binarization.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3e0de6082c7bacca2ff5ad131f0afc44304fc792a6d99e7829399eb61491a0ac
|
3 |
-
size 19632
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binarization.cl
DELETED
@@ -1,67 +0,0 @@
|
|
1 |
-
// Copyright (C) 2018-2022 Intel Corporation
|
2 |
-
// SPDX-License-Identifier: Apache-2.0
|
3 |
-
//
|
4 |
-
|
5 |
-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
6 |
-
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
7 |
-
|
8 |
-
__kernel void binarization(
|
9 |
-
const __global half *__restrict src_data,
|
10 |
-
const __global half *__restrict input_low_high,
|
11 |
-
const __global half *__restrict dst_data,
|
12 |
-
int switch_out,
|
13 |
-
int input_low_high_size,
|
14 |
-
int W,
|
15 |
-
int H)
|
16 |
-
{
|
17 |
-
__local half local_src[15 * 1024];
|
18 |
-
__local half local_dst[15 * 1024];
|
19 |
-
|
20 |
-
event_t e1 = async_work_group_copy(local_src, src_data + get_group_id(2) * W * H, W * H, 0);
|
21 |
-
wait_group_events(1, &e1);
|
22 |
-
|
23 |
-
int c = get_global_id(2);
|
24 |
-
int C = get_global_size(2);
|
25 |
-
|
26 |
-
half dst_low = switch_out ? 1.h : -1.h;
|
27 |
-
half dst_high = switch_out ? -1.h : 1.h;
|
28 |
-
|
29 |
-
half s_ilow_ihigh = input_low_high_size == 1 ? input_low_high[0] : input_low_high[c];
|
30 |
-
|
31 |
-
for (int h = 0; h < H; h++) {
|
32 |
-
|
33 |
-
__local const half *__restrict addr_src = local_src + h * W;
|
34 |
-
__local half *__restrict addr_dst = local_dst + h * W;
|
35 |
-
|
36 |
-
#if 1
|
37 |
-
for (int w = 0; w < W / 8; w++) {
|
38 |
-
|
39 |
-
half8 h_src_val8 = (*((__local half8 *)addr_src + w));
|
40 |
-
|
41 |
-
short8 cond1;
|
42 |
-
cond1.s0 = (h_src_val8.s0 <= s_ilow_ihigh);
|
43 |
-
cond1.s1 = (h_src_val8.s1 <= s_ilow_ihigh);
|
44 |
-
cond1.s2 = (h_src_val8.s2 <= s_ilow_ihigh);
|
45 |
-
cond1.s3 = (h_src_val8.s3 <= s_ilow_ihigh);
|
46 |
-
cond1.s4 = (h_src_val8.s4 <= s_ilow_ihigh);
|
47 |
-
cond1.s5 = (h_src_val8.s5 <= s_ilow_ihigh);
|
48 |
-
cond1.s6 = (h_src_val8.s6 <= s_ilow_ihigh);
|
49 |
-
cond1.s7 = (h_src_val8.s7 <= s_ilow_ihigh);
|
50 |
-
|
51 |
-
cond1 = ~(cond1 - (short8)1);
|
52 |
-
|
53 |
-
short8 res = cond1 & as_short8((half8)dst_low) | ~cond1 & as_short8((half8)dst_high);
|
54 |
-
|
55 |
-
*((__local half8 *)addr_dst + w) = as_half8(res);
|
56 |
-
}
|
57 |
-
#endif
|
58 |
-
for (int w = W & (~0x7); w < W; w++) {
|
59 |
-
addr_dst[w] = (addr_src[w] <= s_ilow_ihigh) ? dst_low : dst_high;
|
60 |
-
}
|
61 |
-
}
|
62 |
-
|
63 |
-
barrier(CLK_LOCAL_MEM_FENCE);
|
64 |
-
|
65 |
-
event_t e2 = async_work_group_copy(dst_data + get_group_id(2) * W * H, local_dst, W * H, 0);
|
66 |
-
wait_group_events(1, &e2);
|
67 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binary_convolution.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:12c349d6f73c233b158e1d67af31715c7b8bda79f191b1e759476e01e65bb64a
|
3 |
-
size 10764
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binary_convolution.cl
DELETED
@@ -1,95 +0,0 @@
|
|
1 |
-
// Copyright (C) 2018-2022 Intel Corporation
|
2 |
-
// SPDX-License-Identifier: Apache-2.0
|
3 |
-
//
|
4 |
-
|
5 |
-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
6 |
-
|
7 |
-
int extract_weights(uchar val, int bit) { return ((val >> bit) & 1); }
|
8 |
-
|
9 |
-
__kernel void binary_convolution(
|
10 |
-
const __global half *restrict src_data,
|
11 |
-
const __global uchar *restrict weights_data,
|
12 |
-
__global half *restrict dst_data,
|
13 |
-
float pad_value,
|
14 |
-
|
15 |
-
int IW,
|
16 |
-
int IH,
|
17 |
-
int IC,
|
18 |
-
|
19 |
-
int DW,
|
20 |
-
int DH,
|
21 |
-
|
22 |
-
int GC,
|
23 |
-
|
24 |
-
int KW,
|
25 |
-
int KH,
|
26 |
-
|
27 |
-
int PW,
|
28 |
-
int PH,
|
29 |
-
|
30 |
-
int SW,
|
31 |
-
int SH)
|
32 |
-
{
|
33 |
-
int ipad_value = ((pad_value > 0.f) ? 1 : 0);
|
34 |
-
int c = get_global_id(2);
|
35 |
-
int y = get_global_id(1);
|
36 |
-
int x = get_global_id(0);
|
37 |
-
|
38 |
-
int OC = get_global_size(2);
|
39 |
-
int OH = get_global_size(1);
|
40 |
-
int OW = get_global_size(0);
|
41 |
-
|
42 |
-
int KD = 1;
|
43 |
-
int SD = 0;
|
44 |
-
int DD = 0;
|
45 |
-
int PD = 0;
|
46 |
-
int ID = 1;
|
47 |
-
int OD = 1;
|
48 |
-
|
49 |
-
int nbits = 8;
|
50 |
-
|
51 |
-
int g = c % GC;
|
52 |
-
int oc = c / GC;
|
53 |
-
int oh = y;
|
54 |
-
int ow = x;
|
55 |
-
|
56 |
-
for (int od = 0; od < OD; od++) {
|
57 |
-
int oidx = g * OC / GC * OD * OH * OW + oc * OD * OH * OW + od * OH * OW + oh * OW + ow;
|
58 |
-
|
59 |
-
int res = 0;
|
60 |
-
|
61 |
-
for (int ic = 0; ic < IC / GC; ic++) {
|
62 |
-
for (int kd = 0; kd < KD; kd++) {
|
63 |
-
for (int kh = 0; kh < KH; kh++) {
|
64 |
-
for (int kw = 0; kw < KW; kw++) {
|
65 |
-
int widx = g * OC / GC * IC / GC * KD * KH * KW
|
66 |
-
+ oc * IC / GC * KD * KH * KW + ic * KD * KH * KW + kd * KH * KW
|
67 |
-
+ kh * KW + kw;
|
68 |
-
|
69 |
-
int w = extract_weights(weights_data[widx / nbits], (widx % nbits));
|
70 |
-
|
71 |
-
int s;
|
72 |
-
|
73 |
-
int iw = ow * SW - PW + kw * DW;
|
74 |
-
int ih = oh * SH - PH + kh * DH;
|
75 |
-
int id = od * SD - PD + kd * DD;
|
76 |
-
|
77 |
-
if (iw < 0 || iw >= (int)IW || ih < 0 || ih >= (int)IH || id < 0
|
78 |
-
|| id >= (int)ID) {
|
79 |
-
s = ipad_value;
|
80 |
-
} else {
|
81 |
-
int iidx = g * IC / GC * ID * IH * IW + ic * ID * IH * IW + id * IH * IW
|
82 |
-
+ ih * IW + iw;
|
83 |
-
|
84 |
-
s = ((src_data[iidx] > 0.f) ? 1 : 0);
|
85 |
-
}
|
86 |
-
|
87 |
-
res += s ^ w;
|
88 |
-
}
|
89 |
-
}
|
90 |
-
}
|
91 |
-
}
|
92 |
-
|
93 |
-
dst_data[oidx] = (half)(IC / GC * KD * KH * KW - 2 * res);
|
94 |
-
}
|
95 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binary_convolution1x1.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6deff31d62aa84c643fbeba77e7dcd4ae5d9b488c1c98e07fffeb58ff8e9b945
|
3 |
-
size 76316
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binary_convolution1x1.cl
DELETED
@@ -1,117 +0,0 @@
|
|
1 |
-
// Copyright (C) 2018-2022 Intel Corporation
|
2 |
-
// SPDX-License-Identifier: Apache-2.0
|
3 |
-
//
|
4 |
-
|
5 |
-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
6 |
-
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
7 |
-
|
8 |
-
ushort extract_weights(uchar val, int bit) { return ((val >> bit) & 1); }
|
9 |
-
|
10 |
-
__kernel void binary_convolution(
|
11 |
-
const __global half *restrict src_data,
|
12 |
-
const __global uchar *restrict weights_data,
|
13 |
-
__global half *restrict dst_data,
|
14 |
-
float pad_value,
|
15 |
-
|
16 |
-
int IW,
|
17 |
-
int IH,
|
18 |
-
int IC,
|
19 |
-
|
20 |
-
int DW,
|
21 |
-
int DH,
|
22 |
-
|
23 |
-
int GC,
|
24 |
-
|
25 |
-
int KW,
|
26 |
-
int KH,
|
27 |
-
|
28 |
-
int PW,
|
29 |
-
int PH,
|
30 |
-
|
31 |
-
int SW,
|
32 |
-
int SH,
|
33 |
-
|
34 |
-
int OW)
|
35 |
-
{
|
36 |
-
__local half src_local[32 * 1024];
|
37 |
-
__local half dst_local[2 * 1024];
|
38 |
-
|
39 |
-
const int oh = get_group_id(0);
|
40 |
-
const int oc = get_group_id(1);
|
41 |
-
const int OH = get_global_size(0);
|
42 |
-
const int OC = get_global_size(1);
|
43 |
-
|
44 |
-
const int gc = oc / (OC / GC);
|
45 |
-
|
46 |
-
if (oh * SH >= 0 && oh * SH <= IH - 1) {
|
47 |
-
const __global half *src = src_data + (gc * IC / GC) * IW * IH + (SH * oh) * IW;
|
48 |
-
|
49 |
-
event_t e1 = async_work_group_copy_2D2D(
|
50 |
-
src_local, // dst
|
51 |
-
src, // src
|
52 |
-
IW, // num_elements_per_line,
|
53 |
-
IC / GC, // num_lines,
|
54 |
-
IH * IW - IW, // src_line_stride,
|
55 |
-
0, // dst_line_stride,
|
56 |
-
0);
|
57 |
-
wait_group_events(1, &e1);
|
58 |
-
}
|
59 |
-
|
60 |
-
half pad_value_half = convert_half(pad_value);
|
61 |
-
|
62 |
-
//padding row
|
63 |
-
if (oh * SH > IH - 1) {
|
64 |
-
__local half *dst = src_local;
|
65 |
-
for (int c = 0; c < IC / GC; c++) {
|
66 |
-
#pragma unroll 8
|
67 |
-
for (int j = 0; j < IW; j++) {
|
68 |
-
dst[j] = pad_value_half;
|
69 |
-
}
|
70 |
-
dst += IW;
|
71 |
-
}
|
72 |
-
}
|
73 |
-
|
74 |
-
int OWS = SW * OW;
|
75 |
-
ushort8 in;
|
76 |
-
|
77 |
-
for (int ows8 = 0; ows8 < (OWS + 7) / 8; ows8++) {
|
78 |
-
ushort8 val = {0, 0, 0, 0, 0, 0, 0, 0};
|
79 |
-
for (int ic = 0; ic < IC / GC; ++ic) {
|
80 |
-
__local half *src = (__local half *)((__local half8 *)(src_local + ic * IW) + ows8);
|
81 |
-
int weight_pos = oc * IC / GC + ic;
|
82 |
-
ushort w =
|
83 |
-
extract_weights(weights_data[((weight_pos + 0)) / 8], ((weight_pos + 0) % 8));
|
84 |
-
|
85 |
-
if ((ows8 * 8) <= IW - 1) {
|
86 |
-
in = *((__local ushort8 *)(src));
|
87 |
-
}
|
88 |
-
|
89 |
-
//padding column
|
90 |
-
if (ows8 * 8 + 7 > IW - 1) {
|
91 |
-
int boundary = (IW - 1) - ows8 * 8 + 1;
|
92 |
-
boundary = boundary < 0 ? 0 : boundary;
|
93 |
-
for (int offset = boundary; offset < 8; offset++) {
|
94 |
-
*((half *)(&in) + offset) = pad_value_half;
|
95 |
-
}
|
96 |
-
}
|
97 |
-
|
98 |
-
ushort8 w8 = (ushort8)(w);
|
99 |
-
|
100 |
-
ushort8 cond =
|
101 |
-
(((in) < (ushort8)0x8000) && (in > (ushort8)0x0000)) ? (ushort8)(1) : (ushort8)(0);
|
102 |
-
|
103 |
-
val += (cond ^ w8);
|
104 |
-
}
|
105 |
-
|
106 |
-
ushort8 val_shift = val << 1;
|
107 |
-
int boundary = (ows8 * 8 + 7) / SW < OW - 1 ? (ows8 * 8 + 7) / SW : OW - 1;
|
108 |
-
for (int ow = (ows8 * 8 + SW - 1) / SW; ow <= boundary; ow++) {
|
109 |
-
*(dst_local + ow) = (half)(IC / GC - *((ushort *)(&val_shift) + ow * SW - ows8 * 8));
|
110 |
-
}
|
111 |
-
}
|
112 |
-
|
113 |
-
barrier(CLK_LOCAL_MEM_FENCE);
|
114 |
-
|
115 |
-
event_t e2 = async_work_group_copy(dst_data + oc * OW * OH + oh * OW, dst_local, OW, 0);
|
116 |
-
wait_group_events(1, &e2);
|
117 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binary_convolution3x3.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:55e3c3f8863ff7a3583bcc7340d1e226775f5f14cfb11dd32bd671764570f7cb
|
3 |
-
size 104136
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/binary_convolution3x3.cl
DELETED
@@ -1,278 +0,0 @@
|
|
1 |
-
// Copyright (C) 2018-2022 Intel Corporation
|
2 |
-
// SPDX-License-Identifier: Apache-2.0
|
3 |
-
//
|
4 |
-
|
5 |
-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
6 |
-
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
7 |
-
|
8 |
-
ushort extract_weights(uchar val, int bit) { return ((val >> bit) & 1); }
|
9 |
-
|
10 |
-
__kernel void binary_convolution(
|
11 |
-
const __global half *restrict src_data,
|
12 |
-
const __global uchar *restrict weights_data,
|
13 |
-
const __global half *restrict dst_data,
|
14 |
-
float pad_value,
|
15 |
-
|
16 |
-
int IW,
|
17 |
-
int IH,
|
18 |
-
int IC,
|
19 |
-
|
20 |
-
int DW,
|
21 |
-
int DH,
|
22 |
-
|
23 |
-
int GC,
|
24 |
-
|
25 |
-
int KW,
|
26 |
-
int KH,
|
27 |
-
|
28 |
-
int PW,
|
29 |
-
int PH,
|
30 |
-
|
31 |
-
int SW,
|
32 |
-
int SH,
|
33 |
-
|
34 |
-
int OW)
|
35 |
-
{
|
36 |
-
__local half src_local[32 * 1024];
|
37 |
-
__local half dst_local[2 * 1024];
|
38 |
-
|
39 |
-
const int oh = get_group_id(0);
|
40 |
-
const int oc = get_group_id(1);
|
41 |
-
const int OH = get_global_size(0);
|
42 |
-
const int OC = get_global_size(1);
|
43 |
-
|
44 |
-
const int gc = oc / (OC / GC);
|
45 |
-
|
46 |
-
if (oh * SH - 1 >= 0 && oh * SH + DH + DH - 1 <= IH - 1) //dma for 3 rows
|
47 |
-
{
|
48 |
-
event_t e = async_work_group_copy_3D3D(
|
49 |
-
src_local, // dst
|
50 |
-
src_data + (gc * IC / GC) * IW * IH + (SH * oh - 1) * IW, // src
|
51 |
-
IW, // num_elements_per_line
|
52 |
-
3, // num_lines
|
53 |
-
DH * IW - IW, // src_line_stride
|
54 |
-
0, // dst_line_stride
|
55 |
-
IC / GC, // num planes
|
56 |
-
IH * IW - 3 * DH * IW, // src plane stride
|
57 |
-
0, // dst plane stride
|
58 |
-
0);
|
59 |
-
wait_group_events(1, &e);
|
60 |
-
} else {
|
61 |
-
int ih = oh * SH - 1;
|
62 |
-
if (ih >= 0 && ih <= IH - 1) //dma for first row
|
63 |
-
{
|
64 |
-
event_t e = async_work_group_copy_2D2D(
|
65 |
-
src_local, // dst
|
66 |
-
src_data + (gc * IC / GC) * IW * IH + ih * IW, // src
|
67 |
-
IW, // num_elements_per_line,
|
68 |
-
IC / GC, // num_lines,
|
69 |
-
IH * IW - IW, // src_line_stride,
|
70 |
-
2 * IW, // dst_line_stride,
|
71 |
-
0);
|
72 |
-
|
73 |
-
wait_group_events(1, &e);
|
74 |
-
}
|
75 |
-
ih = oh * SH - 1 + DH;
|
76 |
-
if (ih >= 0 && ih <= IH - 1) //dma for second row
|
77 |
-
{
|
78 |
-
event_t e = async_work_group_copy_2D2D(
|
79 |
-
src_local + IW, // dst
|
80 |
-
src_data + (gc * IC / GC) * IW * IH + ih * IW, // src
|
81 |
-
IW, // num_elements_per_line,
|
82 |
-
IC / GC, // num_lines,
|
83 |
-
IH * IW - IW, // src_line_stride,
|
84 |
-
2 * IW, // dst_line_stride,
|
85 |
-
0);
|
86 |
-
wait_group_events(1, &e);
|
87 |
-
}
|
88 |
-
ih = oh * SH - 1 + 2 * DH;
|
89 |
-
if (ih >= 0 && ih <= IH - 1) //dma for third row
|
90 |
-
{
|
91 |
-
event_t e = async_work_group_copy_2D2D(
|
92 |
-
src_local + 2 * IW, // dst
|
93 |
-
src_data + (gc * IC / GC) * IW * IH + ih * IW, // src
|
94 |
-
IW, // num_elements_per_line,
|
95 |
-
IC / GC, // num_lines,
|
96 |
-
IH * IW - IW, // src_line_stride,
|
97 |
-
2 * IW, // dst_line_stride,
|
98 |
-
0);
|
99 |
-
wait_group_events(1, &e);
|
100 |
-
}
|
101 |
-
}
|
102 |
-
|
103 |
-
half pad_value_half = convert_half(pad_value);
|
104 |
-
|
105 |
-
//padding row
|
106 |
-
if (oh * SH - 1 < 0 || oh * SH - 1 > IH - 1) {
|
107 |
-
__local half *dst = src_local;
|
108 |
-
for (int c = 0; c < IC / GC; c++) {
|
109 |
-
#pragma unroll 8
|
110 |
-
for (int j = 0; j < IW; j++) {
|
111 |
-
dst[j] = pad_value_half;
|
112 |
-
}
|
113 |
-
dst += 3 * IW;
|
114 |
-
}
|
115 |
-
}
|
116 |
-
if (oh * SH + DH - 1 > IH - 1) {
|
117 |
-
__local half *dst = src_local + IW;
|
118 |
-
for (int c = 0; c < IC / GC; c++) {
|
119 |
-
#pragma unroll 8
|
120 |
-
for (int j = 0; j < IW; j++) {
|
121 |
-
dst[j] = pad_value_half;
|
122 |
-
}
|
123 |
-
dst += 3 * IW;
|
124 |
-
}
|
125 |
-
}
|
126 |
-
if (oh * SH + DH + DH - 1 > IH - 1) {
|
127 |
-
__local half *dst = src_local + 2 * IW;
|
128 |
-
for (int c = 0; c < IC / GC; c++) {
|
129 |
-
#pragma unroll 8
|
130 |
-
for (int j = 0; j < IW; j++) {
|
131 |
-
dst[j] = pad_value_half;
|
132 |
-
}
|
133 |
-
dst += 3 * IW;
|
134 |
-
}
|
135 |
-
}
|
136 |
-
|
137 |
-
int OWS = SW * OW;
|
138 |
-
|
139 |
-
ushort8 in00;
|
140 |
-
ushort8 in01;
|
141 |
-
ushort8 in02;
|
142 |
-
ushort8 in10;
|
143 |
-
ushort8 in11;
|
144 |
-
ushort8 in12;
|
145 |
-
ushort8 in20;
|
146 |
-
ushort8 in21;
|
147 |
-
ushort8 in22;
|
148 |
-
|
149 |
-
for (int ows8 = 0; ows8 < (OWS + 7) / 8; ows8++) {
|
150 |
-
ushort8 val = {0, 0, 0, 0, 0, 0, 0, 0};
|
151 |
-
for (int ic = 0; ic < IC / GC; ++ic) {
|
152 |
-
__local half *src =
|
153 |
-
(__local half *)((__local half8 *)(src_local + ic * IW * 3 + IW + DW - 1) + ows8);
|
154 |
-
int weight_pos = oc * IC / GC * 3 * 3 + ic * 3 * 3;
|
155 |
-
ushort w0 = extract_weights(weights_data[((weight_pos + 0)) / 8], ((weight_pos + 0) % 8));
|
156 |
-
ushort w1 = extract_weights(weights_data[((weight_pos + 1)) / 8], ((weight_pos + 1) % 8));
|
157 |
-
ushort w2 = extract_weights(weights_data[((weight_pos + 2)) / 8], ((weight_pos + 2) % 8));
|
158 |
-
ushort w3 = extract_weights(weights_data[((weight_pos + 3)) / 8], ((weight_pos + 3) % 8));
|
159 |
-
ushort w4 = extract_weights(weights_data[((weight_pos + 4)) / 8], ((weight_pos + 4) % 8));
|
160 |
-
ushort w5 = extract_weights(weights_data[((weight_pos + 5)) / 8], ((weight_pos + 5) % 8));
|
161 |
-
ushort w6 = extract_weights(weights_data[((weight_pos + 6)) / 8], ((weight_pos + 6) % 8));
|
162 |
-
ushort w7 = extract_weights(weights_data[((weight_pos + 7)) / 8], ((weight_pos + 7) % 8));
|
163 |
-
ushort w8 = extract_weights(weights_data[((weight_pos + 8)) / 8], ((weight_pos + 8) % 8));
|
164 |
-
|
165 |
-
if ((ows8 * 8) - 1 <= IW - 1) {
|
166 |
-
in00 = *((__local ushort8 *)(src - IW - DW));
|
167 |
-
in01 = *((__local ushort8 *)(src - IW));
|
168 |
-
in02 = *((__local ushort8 *)(src - IW + DW));
|
169 |
-
|
170 |
-
in10 = *((__local ushort8 *)(src - DW));
|
171 |
-
in11 = *((__local ushort8 *)(src));
|
172 |
-
in12 = *((__local ushort8 *)(src + DW));
|
173 |
-
|
174 |
-
in20 = *((__local ushort8 *)(src + IW - DW));
|
175 |
-
in21 = *((__local ushort8 *)(src + IW));
|
176 |
-
in22 = *((__local ushort8 *)(src + IW + DW));
|
177 |
-
}
|
178 |
-
|
179 |
-
//padding column
|
180 |
-
if (ows8 * 8 - 1 < 0) {
|
181 |
-
int boundary = 1 - ows8 * 8;
|
182 |
-
boundary = boundary > 8 ? 8 : boundary;
|
183 |
-
for (int offset = 0; offset < boundary; offset++) {
|
184 |
-
*((half *)(&in00) + offset) = pad_value_half;
|
185 |
-
*((half *)(&in10) + offset) = pad_value_half;
|
186 |
-
*((half *)(&in20) + offset) = pad_value_half;
|
187 |
-
}
|
188 |
-
}
|
189 |
-
if ((ows8 * 8 + 7) + DW + DW - 1 > IW - 1) {
|
190 |
-
int boundary = (IW - DW - 1 - DW + 1) - ows8 * 8 + 1;
|
191 |
-
boundary = boundary < 0 ? 0 : boundary;
|
192 |
-
for (int offset = boundary; offset < 8; offset++) {
|
193 |
-
*((half *)(&in02) + offset) = pad_value_half;
|
194 |
-
*((half *)(&in12) + offset) = pad_value_half;
|
195 |
-
*((half *)(&in22) + offset) = pad_value_half;
|
196 |
-
}
|
197 |
-
}
|
198 |
-
if ((ows8 * 8 + 7) + DW - 1 > IW - 1) {
|
199 |
-
int boundary = (IW - 1 - DW + 1) - ows8 * 8 + 1;
|
200 |
-
boundary = boundary < 0 ? 0 : boundary;
|
201 |
-
for (int offset = boundary; offset < 8; offset++) {
|
202 |
-
*((half *)(&in01) + offset) = pad_value_half;
|
203 |
-
*((half *)(&in11) + offset) = pad_value_half;
|
204 |
-
*((half *)(&in21) + offset) = pad_value_half;
|
205 |
-
}
|
206 |
-
}
|
207 |
-
if ((ows8 * 8 + 7) - 1 > IW - 1) {
|
208 |
-
int boundary = (IW - 1 + 1) - ows8 * 8 + 1;
|
209 |
-
boundary = boundary < 0 ? 0 : boundary;
|
210 |
-
for (int offset = boundary; offset < 8; offset++) {
|
211 |
-
*((half *)(&in00) + offset) = pad_value_half;
|
212 |
-
*((half *)(&in10) + offset) = pad_value_half;
|
213 |
-
*((half *)(&in20) + offset) = pad_value_half;
|
214 |
-
}
|
215 |
-
}
|
216 |
-
|
217 |
-
ushort8 w00 = (ushort8)(w0);
|
218 |
-
ushort8 w01 = (ushort8)(w1);
|
219 |
-
ushort8 w02 = (ushort8)(w2);
|
220 |
-
ushort8 w10 = (ushort8)(w3);
|
221 |
-
ushort8 w11 = (ushort8)(w4);
|
222 |
-
ushort8 w12 = (ushort8)(w5);
|
223 |
-
ushort8 w20 = (ushort8)(w6);
|
224 |
-
ushort8 w21 = (ushort8)(w7);
|
225 |
-
ushort8 w22 = (ushort8)(w8);
|
226 |
-
|
227 |
-
ushort8 cond0 = (((in00) < (ushort8)0x8000) && (in00 > (ushort8)0x0000)) ?
|
228 |
-
(ushort8)(1) :
|
229 |
-
(ushort8)(0);
|
230 |
-
ushort8 cond1 = (((in01) < (ushort8)0x8000) && (in01 > (ushort8)0x0000)) ?
|
231 |
-
(ushort8)(1) :
|
232 |
-
(ushort8)(0);
|
233 |
-
ushort8 cond2 = (((in02) < (ushort8)0x8000) && (in02 > (ushort8)0x0000)) ?
|
234 |
-
(ushort8)(1) :
|
235 |
-
(ushort8)(0);
|
236 |
-
ushort8 cond3 = (((in10) < (ushort8)0x8000) && (in10 > (ushort8)0x0000)) ?
|
237 |
-
(ushort8)(1) :
|
238 |
-
(ushort8)(0);
|
239 |
-
ushort8 cond4 = (((in11) < (ushort8)0x8000) && (in11 > (ushort8)0x0000)) ?
|
240 |
-
(ushort8)(1) :
|
241 |
-
(ushort8)(0);
|
242 |
-
ushort8 cond5 = (((in12) < (ushort8)0x8000) && (in12 > (ushort8)0x0000)) ?
|
243 |
-
(ushort8)(1) :
|
244 |
-
(ushort8)(0);
|
245 |
-
ushort8 cond6 = (((in20) < (ushort8)0x8000) && (in20 > (ushort8)0x0000)) ?
|
246 |
-
(ushort8)(1) :
|
247 |
-
(ushort8)(0);
|
248 |
-
ushort8 cond7 = (((in21) < (ushort8)0x8000) && (in21 > (ushort8)0x0000)) ?
|
249 |
-
(ushort8)(1) :
|
250 |
-
(ushort8)(0);
|
251 |
-
ushort8 cond8 = (((in22) < (ushort8)0x8000) && (in22 > (ushort8)0x0000)) ?
|
252 |
-
(ushort8)(1) :
|
253 |
-
(ushort8)(0);
|
254 |
-
|
255 |
-
val += (cond0 ^ w00);
|
256 |
-
val += (cond1 ^ w01);
|
257 |
-
val += (cond2 ^ w02);
|
258 |
-
val += (cond3 ^ w10);
|
259 |
-
val += (cond4 ^ w11);
|
260 |
-
val += (cond5 ^ w12);
|
261 |
-
val += (cond6 ^ w20);
|
262 |
-
val += (cond7 ^ w21);
|
263 |
-
val += (cond8 ^ w22);
|
264 |
-
}
|
265 |
-
|
266 |
-
ushort8 val_shift = val << 1;
|
267 |
-
int boundary = (ows8 * 8 + 7) / SW <= OW - 1 ? (ows8 * 8 + 7) / SW : OW - 1;
|
268 |
-
for (int ow = (ows8 * 8 + SW - 1) / SW; ow <= boundary; ow++) {
|
269 |
-
*(dst_local + ow) =
|
270 |
-
(half)(IC / GC * KH * KW - *((ushort *)(&val_shift) + ow * SW - ows8 * 8));
|
271 |
-
}
|
272 |
-
}
|
273 |
-
|
274 |
-
barrier(CLK_LOCAL_MEM_FENCE);
|
275 |
-
|
276 |
-
event_t e2 = async_work_group_copy(dst_data + oc * OW * OH + oh * OW, dst_local, OW, 0);
|
277 |
-
wait_group_events(1, &e2);
|
278 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/convolution1x1_chw.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8717c8429d41a69337007871137f06a9e6b38c685b5b3fecc634fade0eaa7e7f
|
3 |
-
size 9220
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/convolution1x1_chw.cl
DELETED
@@ -1,114 +0,0 @@
|
|
1 |
-
// Copyright (C) 2018-2022 Intel Corporation
|
2 |
-
// SPDX-License-Identifier: Apache-2.0
|
3 |
-
//
|
4 |
-
|
5 |
-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
6 |
-
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
7 |
-
|
8 |
-
__kernel void Convolution1x1_NCHW(
|
9 |
-
const __global half *in,
|
10 |
-
const __global half *out,
|
11 |
-
const __global half *w,
|
12 |
-
int IW,
|
13 |
-
int IH,
|
14 |
-
int IC,
|
15 |
-
int OW,
|
16 |
-
int OH,
|
17 |
-
int OC)
|
18 |
-
{
|
19 |
-
__local half in_local[8 * 1024];
|
20 |
-
__local half out_local[8 * 1024];
|
21 |
-
|
22 |
-
event_t e1 = async_work_group_copy_2D2D(
|
23 |
-
in_local, // dst
|
24 |
-
in + get_group_id(0) * IW, // src
|
25 |
-
IW, // num_elements_per_line,
|
26 |
-
IC, // num_lines,
|
27 |
-
IW * IH - IW, // src_line_stride,
|
28 |
-
0, // dst_line_stride,
|
29 |
-
0);
|
30 |
-
wait_group_events(1, &e1);
|
31 |
-
|
32 |
-
int oh = get_global_id(0);
|
33 |
-
int oc = get_global_id(1);
|
34 |
-
|
35 |
-
int stride;
|
36 |
-
int write_output = 0;
|
37 |
-
__global half *src;
|
38 |
-
|
39 |
-
__global half8 *w8 = (__global half8 *)(&w[oc * IC]);
|
40 |
-
__global half *w1 = (__global half *)(&w[oc * IC]);
|
41 |
-
|
42 |
-
for (uint ow = 0; ow < (OW & (~0x7)); ow += 8) {
|
43 |
-
uint iw = ow;
|
44 |
-
uint ih = oh;
|
45 |
-
|
46 |
-
half8 val8_0 = 0.0f;
|
47 |
-
|
48 |
-
__local half8 *in8_0 = (__local half8 *)(&in_local[iw + 0 * IW]);
|
49 |
-
__local half8 *in8_1 = (__local half8 *)(&in_local[iw + 1 * IW]);
|
50 |
-
__local half8 *in8_2 = (__local half8 *)(&in_local[iw + 2 * IW]);
|
51 |
-
__local half8 *in8_3 = (__local half8 *)(&in_local[iw + 3 * IW]);
|
52 |
-
__local half8 *in8_4 = (__local half8 *)(&in_local[iw + 4 * IW]);
|
53 |
-
__local half8 *in8_5 = (__local half8 *)(&in_local[iw + 5 * IW]);
|
54 |
-
__local half8 *in8_6 = (__local half8 *)(&in_local[iw + 6 * IW]);
|
55 |
-
__local half8 *in8_7 = (__local half8 *)(&in_local[iw + 7 * IW]);
|
56 |
-
|
57 |
-
for (uint ic = 0; ic < IC / 8; ic++) {
|
58 |
-
val8_0 += (in8_0[ic * IW]) * ((half8)w8[ic].s0);
|
59 |
-
val8_0 += (in8_1[ic * IW]) * ((half8)w8[ic].s1);
|
60 |
-
val8_0 += (in8_2[ic * IW]) * ((half8)w8[ic].s2);
|
61 |
-
val8_0 += (in8_3[ic * IW]) * ((half8)w8[ic].s3);
|
62 |
-
val8_0 += (in8_4[ic * IW]) * ((half8)w8[ic].s4);
|
63 |
-
val8_0 += (in8_5[ic * IW]) * ((half8)w8[ic].s5);
|
64 |
-
val8_0 += (in8_6[ic * IW]) * ((half8)w8[ic].s6);
|
65 |
-
val8_0 += (in8_7[ic * IW]) * ((half8)w8[ic].s7);
|
66 |
-
}
|
67 |
-
|
68 |
-
for (uint ic = (IC & (~0x7)); ic < IC; ++ic) {
|
69 |
-
val8_0 += *((__local half8 *)(&in_local[iw + ic * IW])) * ((half8)w1[ic]);
|
70 |
-
}
|
71 |
-
*((__local half8 *)&out_local[ow + 0]) = (val8_0);
|
72 |
-
}
|
73 |
-
|
74 |
-
uint iw = (OW & (~0x7));
|
75 |
-
uint ih = oh;
|
76 |
-
|
77 |
-
half8 val8_0 = 0.0f;
|
78 |
-
|
79 |
-
__local half8 *in8_0 = (__local half8 *)(&in_local[iw + 0 * IW]);
|
80 |
-
__local half8 *in8_1 = (__local half8 *)(&in_local[iw + 1 * IW]);
|
81 |
-
__local half8 *in8_2 = (__local half8 *)(&in_local[iw + 2 * IW]);
|
82 |
-
__local half8 *in8_3 = (__local half8 *)(&in_local[iw + 3 * IW]);
|
83 |
-
__local half8 *in8_4 = (__local half8 *)(&in_local[iw + 4 * IW]);
|
84 |
-
__local half8 *in8_5 = (__local half8 *)(&in_local[iw + 5 * IW]);
|
85 |
-
__local half8 *in8_6 = (__local half8 *)(&in_local[iw + 6 * IW]);
|
86 |
-
__local half8 *in8_7 = (__local half8 *)(&in_local[iw + 7 * IW]);
|
87 |
-
|
88 |
-
for (uint ic = 0; ic < IC / 8; ic++) {
|
89 |
-
val8_0 += (in8_0[ic * IW]) * ((half8)w8[ic].s0);
|
90 |
-
val8_0 += (in8_1[ic * IW]) * ((half8)w8[ic].s1);
|
91 |
-
val8_0 += (in8_2[ic * IW]) * ((half8)w8[ic].s2);
|
92 |
-
val8_0 += (in8_3[ic * IW]) * ((half8)w8[ic].s3);
|
93 |
-
val8_0 += (in8_4[ic * IW]) * ((half8)w8[ic].s4);
|
94 |
-
val8_0 += (in8_5[ic * IW]) * ((half8)w8[ic].s5);
|
95 |
-
val8_0 += (in8_6[ic * IW]) * ((half8)w8[ic].s6);
|
96 |
-
val8_0 += (in8_7[ic * IW]) * ((half8)w8[ic].s7);
|
97 |
-
}
|
98 |
-
|
99 |
-
for (uint ic = (IC & (~0x7)); ic < IC; ++ic) {
|
100 |
-
val8_0 += *((__local half8 *)(&in_local[iw + ic * IW])) * ((half8)w1[ic]);
|
101 |
-
}
|
102 |
-
for (uint ow = (OW & (~0x7)); ow < OW; ow++) {
|
103 |
-
out_local[ow + 0] = (val8_0[ow % 8]);
|
104 |
-
}
|
105 |
-
|
106 |
-
barrier(CLK_LOCAL_MEM_FENCE);
|
107 |
-
|
108 |
-
event_t e2 = async_work_group_copy(
|
109 |
-
out + get_group_id(1) * OW * OH + get_group_id(0) * OW,
|
110 |
-
out_local,
|
111 |
-
OW,
|
112 |
-
0);
|
113 |
-
wait_group_events(1, &e2);
|
114 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/convolution1x1_hwc.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5b6122a6bf6f50d2c7fc612d4e286559f9c96746e166892d192e1264e1ce5a2c
|
3 |
-
size 4304
|
|
|
|
|
|
|
|
openvino/vpu_custom_kernels/convolution1x1_hwc.cl
DELETED
@@ -1,126 +0,0 @@
|
|
1 |
-
// Copyright (C) 2018-2022 Intel Corporation
|
2 |
-
// SPDX-License-Identifier: Apache-2.0
|
3 |
-
//
|
4 |
-
|
5 |
-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
6 |
-
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
7 |
-
|
8 |
-
__kernel void Convolution1x1_NHWC(
|
9 |
-
const __global half *in,
|
10 |
-
const __global half *out,
|
11 |
-
const __global half *w,
|
12 |
-
int IW,
|
13 |
-
int IH,
|
14 |
-
int IC,
|
15 |
-
int OW,
|
16 |
-
int OH,
|
17 |
-
int OC)
|
18 |
-
{
|
19 |
-
|
20 |
-
__local half in_local[8 * 1024];
|
21 |
-
__local half out_local[8 * 1024];
|
22 |
-
|
23 |
-
const int sizeAct = IW * IC;
|
24 |
-
|
25 |
-
event_t e1 = async_work_group_copy(in_local, in + get_group_id(0) * sizeAct, sizeAct, 0);
|
26 |
-
wait_group_events(1, &e1);
|
27 |
-
|
28 |
-
int oh = get_global_id(0);
|
29 |
-
int oc = get_global_id(1);
|
30 |
-
|
31 |
-
int stride;
|
32 |
-
int write_output = 0;
|
33 |
-
__global half *src;
|
34 |
-
|
35 |
-
__global half8 *w8 = (__global half8 *)(&w[oc * IC]);
|
36 |
-
__global half *w1 = (__global half *)(&w[oc * IC]);
|
37 |
-
|
38 |
-
for (uint ow = 0; ow < (OW & (~0x7)); ow += 8) {
|
39 |
-
uint iw = ow;
|
40 |
-
uint ih = oh;
|
41 |
-
|
42 |
-
half8 val8_0 = 0.0f;
|
43 |
-
half8 val8_1 = 0.0f;
|
44 |
-
half8 val8_2 = 0.0f;
|
45 |
-
half8 val8_3 = 0.0f;
|
46 |
-
half8 val8_4 = 0.0f;
|
47 |
-
half8 val8_5 = 0.0f;
|
48 |
-
half8 val8_6 = 0.0f;
|
49 |
-
half8 val8_7 = 0.0f;
|
50 |
-
|
51 |
-
__local half8 *in8_0 = (__local half8 *)(&in_local[(iw + 0) * IC]);
|
52 |
-
__local half8 *in8_1 = (__local half8 *)(&in_local[(iw + 1) * IC]);
|
53 |
-
__local half8 *in8_2 = (__local half8 *)(&in_local[(iw + 2) * IC]);
|
54 |
-
__local half8 *in8_3 = (__local half8 *)(&in_local[(iw + 3) * IC]);
|
55 |
-
__local half8 *in8_4 = (__local half8 *)(&in_local[(iw + 4) * IC]);
|
56 |
-
__local half8 *in8_5 = (__local half8 *)(&in_local[(iw + 5) * IC]);
|
57 |
-
__local half8 *in8_6 = (__local half8 *)(&in_local[(iw + 6) * IC]);
|
58 |
-
__local half8 *in8_7 = (__local half8 *)(&in_local[(iw + 7) * IC]);
|
59 |
-
|
60 |
-
for (uint ic = 0; ic < IC / 8; ++ic) {
|
61 |
-
val8_0 += (in8_0[ic]) * (w8[ic]);
|
62 |
-
val8_1 += (in8_1[ic]) * (w8[ic]);
|
63 |
-
val8_2 += (in8_2[ic]) * (w8[ic]);
|
64 |
-
val8_3 += (in8_3[ic]) * (w8[ic]);
|
65 |
-
val8_4 += (in8_4[ic]) * (w8[ic]);
|
66 |
-
val8_5 += (in8_5[ic]) * (w8[ic]);
|
67 |
-
val8_6 += (in8_6[ic]) * (w8[ic]);
|
68 |
-
val8_7 += (in8_7[ic]) * (w8[ic]);
|
69 |
-
}
|
70 |
-
|
71 |
-
half val_0 = 0.0f;
|
72 |
-
half val_1 = 0.0f;
|
73 |
-
half val_2 = 0.0f;
|
74 |
-
half val_3 = 0.0f;
|
75 |
-
half val_4 = 0.0f;
|
76 |
-
half val_5 = 0.0f;
|
77 |
-
half val_6 = 0.0f;
|
78 |
-
half val_7 = 0.0f;
|
79 |
-
for (uint ic = IC & (~0x7); ic < IC; ++ic) {
|
80 |
-
val_0 += *((__local half *)in8_0 + ic) * (*((__global half *)w8 + ic));
|
81 |
-
val_1 += *((__local half *)in8_1 + ic) * (*((__global half *)w8 + ic));
|
82 |
-
val_2 += *((__local half *)in8_2 + ic) * (*((__global half *)w8 + ic));
|
83 |
-
val_3 += *((__local half *)in8_3 + ic) * (*((__global half *)w8 + ic));
|
84 |
-
val_4 += *((__local half *)in8_4 + ic) * (*((__global half *)w8 + ic));
|
85 |
-
val_5 += *((__local half *)in8_5 + ic) * (*((__global half *)w8 + ic));
|
86 |
-
val_6 += *((__local half *)in8_6 + ic) * (*((__global half *)w8 + ic));
|
87 |
-
val_7 += *((__local half *)in8_7 + ic) * (*((__global half *)w8 + ic));
|
88 |
-
}
|
89 |
-
out_local[ow + 0] = __builtin_shave_sau_sumx_f16_r(val8_0) + val_0;
|
90 |
-
out_local[ow + 1] = __builtin_shave_sau_sumx_f16_r(val8_1) + val_1;
|
91 |
-
out_local[ow + 2] = __builtin_shave_sau_sumx_f16_r(val8_2) + val_2;
|
92 |
-
out_local[ow + 3] = __builtin_shave_sau_sumx_f16_r(val8_3) + val_3;
|
93 |
-
out_local[ow + 4] = __builtin_shave_sau_sumx_f16_r(val8_4) + val_4;
|
94 |
-
out_local[ow + 5] = __builtin_shave_sau_sumx_f16_r(val8_5) + val_5;
|
95 |
-
out_local[ow + 6] = __builtin_shave_sau_sumx_f16_r(val8_6) + val_6;
|
96 |
-
out_local[ow + 7] = __builtin_shave_sau_sumx_f16_r(val8_7) + val_7;
|
97 |
-
}
|
98 |
-
for (uint ow = (OW & (~0x7)); ow < OW; ow++) {
|
99 |
-
|
100 |
-
uint iw = ow;
|
101 |
-
uint ih = oh;
|
102 |
-
|
103 |
-
half8 val8 = 0.0f;
|
104 |
-
|
105 |
-
__local half8 *in8 = (__local half8 *)(&in_local[iw * IC]);
|
106 |
-
|
107 |
-
for (uint ic = 0; ic < IC / 8; ++ic) {
|
108 |
-
val8 += (in8[ic]) * (w8[ic]);
|
109 |
-
}
|
110 |
-
|
111 |
-
half val = 0.0f;
|
112 |
-
for (uint ic = (IC & (~0x7)); ic < IC; ++ic) {
|
113 |
-
val += (*((__local half *)in8 + ic)) * (*((__global half *)w8 + ic));
|
114 |
-
}
|
115 |
-
out_local[ow] = __builtin_shave_sau_sumx_f16_r(val8) + val;
|
116 |
-
}
|
117 |
-
|
118 |
-
barrier(CLK_LOCAL_MEM_FENCE);
|
119 |
-
|
120 |
-
event_t e2 = async_work_group_copy(
|
121 |
-
out + get_group_id(1) * OW * OH + get_group_id(0) * OW,
|
122 |
-
out_local,
|
123 |
-
OW,
|
124 |
-
0);
|
125 |
-
wait_group_events(1, &e2);
|
126 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|