Spaces:

FaceOnLive
/

Face-Liveness-Detection-SDK

Running

App Files Files Community

Face-Liveness-Detection-SDK / openvino /vpu_custom_kernels /reorg_hwc.cl

Zhu-FaceOnLive

Initial commit.

2ded60b about 2 years ago

raw

history blame

1.99 kB

	// Copyright (C) 2018-2022 Intel Corporation
	// SPDX-License-Identifier: Apache-2.0
	//

	#pragma OPENCL EXTENSION cl_khr_fp16 : enable
	#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable

	__kernel void reorg_hwc(
	__global half const *restrict src,
	__global half *restrict dst,
	int W,
	int H,
	int C,
	int stride)
	{
	__local half local_src[8 * 1024];
	__local half local_dst[8 * 1024];

	event_t e1 = async_work_group_copy_2D2D(
	local_src, // dst
	src + get_group_id(0) * stride + get_group_id(1) * C, // src
	stride, // num_elements_per_line
	H * W / stride, // num_lines
	(C - 1) * stride, // src_line_stride
	0, // dst_line_stride
	0);
	wait_group_events(1, &e1);

	const int stride_y = get_local_id(1);
	const int blocks = get_local_size(0);
	const int b = get_local_id(0);

	const int OC = stride * stride;
	const int OH = H / stride;
	const int OW = W / stride;
	const int IC = stride;
	const int IH = H;
	const int IW = W / stride;

	for (int block_h = 0; block_h < stride; block_h++) {
	const int src_line = b * stride * stride + stride_y * stride + block_h;
	const int c = src_line / IH;
	const int h = src_line % IH;

	const int dst_line = b * stride + stride_y * blocks * stride + block_h;
	const int oc = dst_line / OH;
	const int oh = dst_line % OH;

	for (int w = 0; w < W / stride; w++) {
	local_dst[oh * OW * OC + w * OC + oc] = local_src[h * IW * IC + w * IC + c];
	}
	}

	barrier(CLK_LOCAL_MEM_FENCE);

	event_t e2 = async_work_group_copy_2D2D(
	dst + get_group_id(1) * C + get_group_id(0) * stride, // dst
	local_dst, // src
	stride, // num_elements_per_line
	W * H / stride, // num_lines
	0, // src_line_stride
	C * stride - stride, // dst_line_stride
	0);
	wait_group_events(1, &e2);
	}