qqc1989 commited on Jan 22

Commit

8df2c09

verified ·

1 Parent(s): 60db486

Upload 39 files

Browse files

Files changed (40) hide show

.gitattributes +4 -0
calib-cocotest2017.tar +3 -0
depth_anything_v2_vits.onnx +3 -0
depth_anything_v2_vits_ax620e.axmodel +3 -0
depth_anything_v2_vits_ax650.axmodel +3 -0
python/axengine/__init__.py +22 -0
python/axengine/_axclrt.py +372 -0
python/axengine/_axclrt_capi.py +198 -0
python/axengine/_axclrt_types.py +21 -0
python/axengine/_axe.py +399 -0
python/axengine/_axe_capi.py +323 -0
python/axengine/_axe_types.py +29 -0
python/axengine/_base_session.py +59 -0
python/axengine/_node.py +13 -0
python/axengine/_providers.py +31 -0
python/axengine/_session.py +117 -0
python/examples/demo01.jpg +0 -0
python/examples/demo02.jpg +0 -0
python/examples/demo03.jpg +0 -0
python/examples/demo04.jpg +0 -0
python/examples/demo05.jpg +0 -0
python/examples/demo06.jpg +0 -0
python/examples/demo07.jpg +0 -0
python/examples/demo08.jpg +0 -0
python/examples/demo09.jpg +0 -0
python/examples/demo10.jpg +0 -0
python/examples/demo11.jpg +0 -0
python/examples/demo12.jpg +0 -0
python/examples/demo13.jpg +0 -0
python/examples/demo14.jpg +0 -0
python/examples/demo15.jpg +0 -0
python/examples/demo16.jpg +0 -0
python/examples/demo17.jpg +0 -0
python/examples/demo18.jpg +0 -0
python/examples/demo19.jpg +3 -0
python/examples/demo20.jpg +0 -0
python/infer.py +50 -0
python/infer_onnx.py +56 -0
python/output.png +3 -0
python/requirements.txt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+depth_anything_v2_vits_ax620e.axmodel filter=lfs diff=lfs merge=lfs -text
+depth_anything_v2_vits_ax650.axmodel filter=lfs diff=lfs merge=lfs -text
+python/examples/demo19.jpg filter=lfs diff=lfs merge=lfs -text
+python/output.png filter=lfs diff=lfs merge=lfs -text

calib-cocotest2017.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fd1652bceab31a66a35e05cc26a2c7633e8c5108f4c07c21b5868b9605cc15a
+size 20869120

depth_anything_v2_vits.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:443e95f17819f347f5f987384b8cb7d7d18ed6af6ac46dec9b0152748ba7dfd0
+size 98985978

depth_anything_v2_vits_ax620e.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f13d462a968e309354e23babdaf6a90b26841c58fd02f36531ba5d7bb545bea4
+size 38448968

depth_anything_v2_vits_ax650.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4520309cbefa63a4127aae75f7b7aa0dc5cc07fa02ef7a13a4219b88950499a1
+size 27978862

python/axengine/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+# thanks to community contributors list below:
+#   zylo117: https://github.com/zylo117, first implementation of the axclrt backend
+from ._providers import axengine_provider_name, axclrt_provider_name
+from ._providers import get_all_providers, get_available_providers
+# check if axclrt is installed, or is a supported chip(e.g. AX650, AX620E etc.)
+_available_providers = get_available_providers()
+if not _available_providers:
+    raise ImportError(
+        f"No providers found. Please make sure you have installed one of the following: {get_all_providers()}")
+print("[INFO] Available providers: ", _available_providers)
+from ._node import NodeArg
+from ._session import SessionOptions, InferenceSession

python/axengine/_axclrt.py ADDED Viewed

	@@ -0,0 +1,372 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+# first implementation of AXCLRTSession contributed by zylo117
+import atexit
+import os
+import time
+from typing import Any, Sequence
+import ml_dtypes as mldt
+import numpy as np
+from ._axclrt_capi import axclrt_cffi, axclrt_lib
+from ._axclrt_types import VNPUType, ModelType
+from ._base_session import Session, SessionOptions
+from ._node import NodeArg
+__all__: ["AXCLRTSession"]
+_is_axclrt_initialized = False
+_is_axclrt_engine_initialized = False
+def _transform_dtype(dtype):
+    if dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_UINT8):
+        return np.dtype(np.uint8)
+    elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_INT8):
+        return np.dtype(np.int8)
+    elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_UINT16):
+        return np.dtype(np.uint16)
+    elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_INT16):
+        return np.dtype(np.int16)
+    elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_UINT32):
+        return np.dtype(np.uint32)
+    elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_INT32):
+        return np.dtype(np.int32)
+    elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_FP32):
+        return np.dtype(np.float32)
+    elif dtype == axclrt_cffi.cast("axclrtEngineDataType", axclrt_lib.AXCL_DATA_TYPE_BF16):
+        return np.dtype(mldt.bfloat16)
+    else:
+        raise ValueError(f"Unsupported data type '{dtype}'.")
+def _initialize_axclrt():
+    global _is_axclrt_initialized
+    ret = axclrt_lib.axclInit([])
+    if ret != 0:
+        raise RuntimeError(f"Failed to initialize axcl runtime. {ret}.")
+    _is_axclrt_initialized = True
+def _finalize_axclrt():
+    global _is_axclrt_initialized, _is_axclrt_engine_initialized
+    if _is_axclrt_engine_initialized:
+        axclrt_lib.axclrtEngineFinalize()
+        _is_axclrt_engine_initialized = False
+    if _is_axclrt_initialized:
+        axclrt_lib.axclFinalize()
+        _is_axclrt_initialized = False
+_initialize_axclrt()
+atexit.register(_finalize_axclrt)
+def _get_vnpu_type() -> VNPUType:
+    vnpu_type = axclrt_cffi.new("axclrtEngineVNpuKind *")
+    ret = axclrt_lib.axclrtEngineGetVNpuKind(vnpu_type)
+    if ret != 0:
+        raise RuntimeError("Failed to get VNPU attribute.")
+    return VNPUType(vnpu_type[0])
+def _get_version():
+    major, minor, patch = axclrt_cffi.new('int32_t *'), axclrt_cffi.new('int32_t *'), axclrt_cffi.new(
+        'int32_t *')
+    axclrt_lib.axclrtGetVersion(major, minor, patch)
+    return f'{major[0]}.{minor[0]}.{patch[0]}'
+class AXCLRTSession(Session):
+    def __init__(
+            self,
+            path_or_bytes: str | bytes | os.PathLike,
+            sess_options: SessionOptions | None = None,
+            provider_options: dict[Any, Any] | None = None,
+            **kwargs,
+    ) -> None:
+        super().__init__()
+        self._device_index = 0
+        if provider_options is not None and "device_id" in provider_options[0]:
+            self._device_index = provider_options[0].get("device_id", 0)
+        lst = axclrt_cffi.new("axclrtDeviceList *")
+        ret = axclrt_lib.axclrtGetDeviceList(lst)
+        if ret != 0 or lst.num == 0:
+            raise RuntimeError(f"Get AXCL device failed 0x{ret:08x}, find total {lst.num} device.")
+        if self._device_index >= lst.num:
+            raise RuntimeError(f"Device index {self._device_index} is out of range, total {lst.num} device.")
+        self._device_id = lst.devices[self._device_index]
+        ret = axclrt_lib.axclrtSetDevice(self._device_id)
+        if ret != 0 or lst.num == 0:
+            raise RuntimeError(f"Set AXCL device failed 0x{ret:08x}.")
+        global _is_axclrt_engine_initialized
+        vnpu_type = axclrt_cffi.cast(
+            "axclrtEngineVNpuKind", VNPUType.DISABLED.value
+        )
+        # try to initialize NPU as disabled
+        ret = axclrt_lib.axclrtEngineInit(vnpu_type)
+        # if failed, try to get vnpu type
+        if 0 != ret:
+            vnpu = axclrt_cffi.new("axclrtEngineVNpuKind *")
+            ret = axclrt_lib.axclrtEngineGetVNpuKind(vnpu)
+            # if failed, that means the NPU is not available
+            if ret != 0:
+                raise RuntimeError(f"axclrtEngineInit as {vnpu.value} failed 0x{ret:08x}.")
+            # if success, that means the NPU is already initialized as vnpu.value
+            #   so the initialization is failed.
+            # this means the other users maybe uninitialized the NPU suddenly
+            #   and the app would be terminated unexpectedly at that moment.
+            # but we can't do anything to fix this issue, just print a warning message.
+            #   it because the api looks like onnxruntime, so there no window avoid this.
+            # such as the life.
+            else:
+                print(f"[WARNING] Failed to initialize NPU as {vnpu_type}, NPU is already initialized as {vnpu.value}.")
+        # initialize NPU successfully, mark the flag to ensure the engine will be finalized
+        else:
+            _is_axclrt_engine_initialized = True
+        self.soc_name = axclrt_cffi.string(axclrt_lib.axclrtGetSocName()).decode()
+        print(f"[INFO] SOC Name: {self.soc_name}")
+        # model handle, context, info, io
+        self._model_id = axclrt_cffi.new("uint64_t *")
+        self._context_id = axclrt_cffi.new("uint64_t *")
+        # get vnpu type
+        self._vnpu_type = _get_vnpu_type()
+        print(f"[INFO] VNPU type: {self._vnpu_type}")
+        # load model
+        ret = self._load(path_or_bytes)
+        if 0 != ret:
+            raise RuntimeError("Failed to load model.")
+        print(f"[INFO] Compiler version: {self._get_model_tool_version()}")
+        # get model info
+        self._info = self._get_info()
+        self._shape_count = self._get_shape_count()
+        self._inputs = self._get_inputs()
+        self._outputs = self._get_outputs()
+        # prepare io
+        self._io = self._prepare_io()
+    def __del__(self):
+        self._unload()
+    def _load(self, path_or_bytes):
+        # model buffer, almost copied from onnx runtime
+        if isinstance(path_or_bytes, (str, os.PathLike)):
+            _model_path = axclrt_cffi.new("char[]", path_or_bytes.encode('utf-8'))
+            ret = axclrt_lib.axclrtEngineLoadFromFile(_model_path, self._model_id)
+            if ret != 0:
+                raise RuntimeError("axclrtEngineLoadFromFile failed.")
+        elif isinstance(path_or_bytes, bytes):
+            _model_buffer = axclrt_cffi.new("char[]", path_or_bytes)
+            _model_buffer_size = len(path_or_bytes)
+            dev_mem_ptr = axclrt_cffi.new('void **', axclrt_cffi.NULL)
+            ret = axclrt_lib.axclrtMalloc(dev_mem_ptr, _model_buffer_size, axclrt_lib.AXCL_MEM_MALLOC_NORMAL_ONLY)
+            if ret != 0:
+                raise RuntimeError("axclrtMalloc failed.")
+            ret = axclrt_lib.axclrtMemcpy(dev_mem_ptr[0], _model_buffer, _model_buffer_size, axclrt_lib.AXCL_MEMCPY_HOST_TO_DEVICE)
+            if ret != 0:
+                axclrt_lib.axclrtFree(dev_mem_ptr[0])
+                raise RuntimeError("axclrtMemcpy failed.")
+            ret = axclrt_lib.axclrtEngineLoadFromMem(dev_mem_ptr[0], _model_buffer_size, self._model_id)
+            axclrt_lib.axclrtFree(dev_mem_ptr[0])
+            if ret != 0:
+                raise RuntimeError("axclrtEngineLoadFromMem failed.")
+        else:
+            raise TypeError(f"Unable to load model from type '{type(path_or_bytes)}'")
+        ret = axclrt_lib.axclrtEngineCreateContext(self._model_id[0], self._context_id)
+        if ret != 0:
+            raise RuntimeError("axclrtEngineCreateContext failed")
+        return ret
+    def _unload(self):
+        if self._io is not None:
+            dev_size = axclrt_cffi.new("uint64_t *")
+            dev_prt = axclrt_cffi.new("void **")
+            for i in range(axclrt_lib.axclrtEngineGetNumInputs(self._info[0])):
+                axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io, i, dev_prt, dev_size)
+                axclrt_lib.axclrtFree(dev_prt[0])
+            for i in range(axclrt_lib.axclrtEngineGetNumOutputs(self._info[0])):
+                axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io, i, dev_prt, dev_size)
+                axclrt_lib.axclrtFree(dev_prt[0])
+            axclrt_lib.axclrtEngineDestroyIO(self._io)
+            self._io = None
+        if self._model_id[0] is not None:
+            axclrt_lib.axclrtEngineUnload(self._model_id[0])
+            self._model_id[0] = 0
+    def _get_model_tool_version(self):
+        model_tool_version = axclrt_lib.axclrtEngineGetModelCompilerVersion(self._model_id[0])
+        return axclrt_cffi.string(model_tool_version).decode()
+    def _get_info(self):
+        io_info = axclrt_cffi.new("axclrtEngineIOInfo *")
+        ret = axclrt_lib.axclrtEngineGetIOInfo(self._model_id[0], io_info)
+        if ret != 0:
+            raise RuntimeError("axclrtEngineGetIOInfo failed.")
+        return io_info
+    def _get_shape_count(self):
+        count = axclrt_cffi.new("int32_t *")
+        ret = axclrt_lib.axclrtEngineGetShapeGroupsCount(self._info[0], count)
+        if ret != 0:
+            axclrt_lib.axclrtEngineUnload(self._model_id[0])
+            raise RuntimeError("axclrtEngineGetShapeGroupsCount failed.")
+        return count[0]
+    def _get_inputs(self):
+        inputs = []
+        for group in range(self._shape_count):
+            one_group_io = []
+            for index in range(axclrt_lib.axclrtEngineGetNumInputs(self._info[0])):
+                cffi_name = axclrt_lib.axclrtEngineGetInputNameByIndex(self._info[0], index)
+                name = axclrt_cffi.string(cffi_name).decode("utf-8")
+                cffi_dtype = axclrt_cffi.new("axclrtEngineDataType *")
+                ret = axclrt_lib.axclrtEngineGetInputDataType(self._info[0], index, cffi_dtype)
+                if ret != 0:
+                    raise RuntimeError("axclrtEngineGetInputDataType failed.")
+                dtype = _transform_dtype(cffi_dtype[0])
+                cffi_dims = axclrt_cffi.new("axclrtEngineIODims *")
+                ret = axclrt_lib.axclrtEngineGetInputDims(self._info[0], group, index, cffi_dims)
+                if ret != 0:
+                    raise RuntimeError("axclrtEngineGetInputDims failed.")
+                shape = [cffi_dims.dims[i] for i in range(cffi_dims.dimCount)]
+                meta = NodeArg(name, dtype, shape)
+                one_group_io.append(meta)
+            inputs.append(one_group_io)
+        return inputs
+    def _get_outputs(self):
+        outputs = []
+        for group in range(self._shape_count):
+            one_group_io = []
+            for index in range(axclrt_lib.axclrtEngineGetNumOutputs(self._info[0])):
+                name = axclrt_lib.axclrtEngineGetOutputNameByIndex(self._info[0], index)
+                cffi_dtype = axclrt_cffi.new("axclrtEngineDataType *")
+                ret = axclrt_lib.axclrtEngineGetOutputDataType(self._info[0], index, cffi_dtype)
+                if ret != 0:
+                    raise RuntimeError("axclrtEngineGetOutputDataType failed.")
+                dtype = _transform_dtype(cffi_dtype[0])
+                cffi_dims = axclrt_cffi.new("axclrtEngineIODims *")
+                ret = axclrt_lib.axclrtEngineGetOutputDims(self._info[0], group, index, cffi_dims)
+                if ret != 0:
+                    raise RuntimeError("axclrtEngineGetOutputDims failed.")
+                shape = [cffi_dims.dims[i] for i in range(cffi_dims.dimCount)]
+                meta = NodeArg(name, dtype, shape)
+                one_group_io.append(meta)
+            outputs.append(one_group_io)
+        return outputs
+    def _prepare_io(self):
+        _io = axclrt_cffi.new("axclrtEngineIO *")
+        ret = axclrt_lib.axclrtEngineCreateIO(self._info[0], _io)
+        if ret != 0:
+            raise RuntimeError(f"axclrtEngineCreateIO failed 0x{ret:08x}.")
+        for i in range(axclrt_lib.axclrtEngineGetNumInputs(self._info[0])):
+            max_size = 0
+            for group in range(self._shape_count):
+                size = axclrt_lib.axclrtEngineGetInputSizeByIndex(self._info[0], group, i)
+                max_size = max(max_size, size)
+            dev_ptr = axclrt_cffi.new("void **")
+            ret = axclrt_lib.axclrtMalloc(dev_ptr, max_size, axclrt_lib.AXCL_MEM_MALLOC_NORMAL_ONLY)
+            if 0 != ret or dev_ptr[0] == axclrt_cffi.NULL:
+                raise RuntimeError(f"axclrtMalloc failed 0x{ret:08x} for input {i}.")
+            ret = axclrt_lib.axclrtEngineSetInputBufferByIndex(_io[0], i, dev_ptr[0], max_size)
+            if 0 != ret:
+                raise RuntimeError(f"axclrtEngineSetInputBufferByIndex failed 0x{ret:08x} for input {i}.")
+        for i in range(axclrt_lib.axclrtEngineGetNumOutputs(self._info[0])):
+            max_size = 0
+            for group in range(self._shape_count):
+                size = axclrt_lib.axclrtEngineGetOutputSizeByIndex(self._info[0], group, i)
+                max_size = max(max_size, size)
+            dev_ptr = axclrt_cffi.new("void **")
+            ret = axclrt_lib.axclrtMalloc(dev_ptr, max_size, axclrt_lib.AXCL_MEM_MALLOC_NORMAL_ONLY)
+            if 0 != ret or dev_ptr[0] == axclrt_cffi.NULL:
+                raise RuntimeError(f"axclrtMalloc failed 0x{ret:08x} for output {i}.")
+            ret = axclrt_lib.axclrtEngineSetOutputBufferByIndex(_io[0], i, dev_ptr[0], max_size)
+            if 0 != ret:
+                raise RuntimeError(f"axclrtEngineSetOutputBufferByIndex failed 0x{ret:08x} for output {i}.")
+        return _io[0]
+    def run(
+            self,
+            output_names: list[str],
+            input_feed: dict[str, np.ndarray],
+            run_options=None
+    ):
+        self._validate_input(input_feed)
+        self._validate_output(output_names)
+        if None is output_names:
+            output_names = [o.name for o in self.get_outputs()]
+        # fill model io
+        dev_prt = axclrt_cffi.new("void **")
+        dev_size = axclrt_cffi.new("uint64_t *")
+        for key, npy in input_feed.items():
+            for i, one in enumerate(self.get_inputs()):
+                if one.name == key:
+                    assert (
+                            list(one.shape) == list(npy.shape) and one.dtype == npy.dtype
+                    ), f"model inputs({key}) expect shape {one.shape} and dtype {one.dtype}, howerver gets input with shape {npy.shape} and dtype {npy.dtype}"
+                    if not (
+                            not npy.flags.c_contiguous
+                            and npy.flags.f_contiguous
+                            and npy.flags.contiguous
+                    ):
+                        npy = np.ascontiguousarray(npy)
+                    npy_ptr = axclrt_cffi.cast("void *", npy.ctypes.data)
+                    ret = axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io, i, dev_prt, dev_size)
+                    if 0 != ret:
+                        raise RuntimeError(f"axclrtEngineGetInputBufferByIndex failed for input {i}.")
+                    ret = axclrt_lib.axclrtMemcpy(dev_prt[0], npy_ptr, npy.nbytes, axclrt_lib.AXCL_MEMCPY_HOST_TO_DEVICE)
+                    if 0 != ret:
+                        raise RuntimeError(f"axclrtMemcpy failed for input {i}.")
+        # execute model
+        ret = axclrt_lib.axclrtEngineExecute(self._model_id[0], self._context_id[0], 0, self._io)
+        # get output
+        outputs = []
+        if 0 == ret:
+            for i in range(len(self.get_outputs())):
+                ret = axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io, i, dev_prt, dev_size)
+                if 0 != ret:
+                    raise RuntimeError(f"axclrtEngineGetOutputBufferByIndex failed for output {i}.")
+                npy = np.zeros(self.get_outputs()[i].shape, dtype=self.get_outputs()[i].dtype)
+                npy_ptr = axclrt_cffi.cast("void *", npy.ctypes.data)
+                ret = axclrt_lib.axclrtMemcpy(npy_ptr, dev_prt[0], npy.nbytes, axclrt_lib.AXCL_MEMCPY_DEVICE_TO_HOST)
+                if 0 != ret:
+                    raise RuntimeError(f"axclrtMemcpy failed for output {i}.")
+                name = self.get_outputs()[i].name
+                if name in output_names:
+                    outputs.append(npy)
+            return outputs
+        else:
+            raise RuntimeError(f"axclrtEngineExecute failed 0x{ret:08x}")

python/axengine/_axclrt_capi.py ADDED Viewed

	@@ -0,0 +1,198 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+import ctypes.util
+from cffi import FFI
+__all__: ["axclrt_cffi", "axclrt_lib"]
+axclrt_cffi = FFI()
+# axcl_base.h
+axclrt_cffi.cdef(
+    """
+    #define AXCL_MAX_DEVICE_COUNT 256
+    typedef int32_t axclError;
+    typedef void *axclrtContext;
+"""
+)
+# axcl_rt_type.h
+axclrt_cffi.cdef(
+    """
+    typedef struct axclrtDeviceList {
+        uint32_t num;
+        int32_t devices[AXCL_MAX_DEVICE_COUNT];
+    } axclrtDeviceList;
+    typedef enum axclrtMemMallocPolicy {
+        AXCL_MEM_MALLOC_HUGE_FIRST,
+        AXCL_MEM_MALLOC_HUGE_ONLY,
+        AXCL_MEM_MALLOC_NORMAL_ONLY
+    } axclrtMemMallocPolicy;
+    typedef enum axclrtMemcpyKind {
+        AXCL_MEMCPY_HOST_TO_HOST,
+        AXCL_MEMCPY_HOST_TO_DEVICE,     //!< host vir -> device phy
+        AXCL_MEMCPY_DEVICE_TO_HOST,     //!< host vir <- device phy
+        AXCL_MEMCPY_DEVICE_TO_DEVICE,
+        AXCL_MEMCPY_HOST_PHY_TO_DEVICE, //!< host phy -> device phy
+        AXCL_MEMCPY_DEVICE_TO_HOST_PHY, //!< host phy <- device phy
+    } axclrtMemcpyKind;
+"""
+)
+# axcl_rt_engine_type.h
+axclrt_cffi.cdef(
+    """
+    #define AXCLRT_ENGINE_MAX_DIM_CNT 32
+    typedef void* axclrtEngineIOInfo;
+    typedef void* axclrtEngineIO;
+    typedef enum axclrtEngineVNpuKind {
+        AXCL_VNPU_DISABLE = 0,
+        AXCL_VNPU_ENABLE = 1,
+        AXCL_VNPU_BIG_LITTLE = 2,
+        AXCL_VNPU_LITTLE_BIG = 3,
+    } axclrtEngineVNpuKind;
+    typedef enum axclrtEngineDataType {
+        AXCL_DATA_TYPE_NONE = 0,
+        AXCL_DATA_TYPE_INT4 = 1,
+        AXCL_DATA_TYPE_UINT4 = 2,
+        AXCL_DATA_TYPE_INT8 = 3,
+        AXCL_DATA_TYPE_UINT8 = 4,
+        AXCL_DATA_TYPE_INT16 = 5,
+        AXCL_DATA_TYPE_UINT16 = 6,
+        AXCL_DATA_TYPE_INT32 = 7,
+        AXCL_DATA_TYPE_UINT32 = 8,
+        AXCL_DATA_TYPE_INT64 = 9,
+        AXCL_DATA_TYPE_UINT64 = 10,
+        AXCL_DATA_TYPE_FP4 = 11,
+        AXCL_DATA_TYPE_FP8 = 12,
+        AXCL_DATA_TYPE_FP16 = 13,
+        AXCL_DATA_TYPE_BF16 = 14,
+        AXCL_DATA_TYPE_FP32 = 15,
+        AXCL_DATA_TYPE_FP64 = 16,
+    } axclrtEngineDataType;
+    typedef enum axclrtEngineDataLayout {
+        AXCL_DATA_LAYOUT_NONE = 0,
+        AXCL_DATA_LAYOUT_NHWC = 0,
+        AXCL_DATA_LAYOUT_NCHW = 1,
+    } axclrtEngineDataLayout;
+    typedef struct axclrtEngineIODims {
+        int32_t dimCount;
+        int32_t dims[AXCLRT_ENGINE_MAX_DIM_CNT];
+    } axclrtEngineIODims;
+"""
+)
+# axcl.h
+axclrt_cffi.cdef(
+    """
+    axclError axclInit(const char *config);
+    axclError axclFinalize();
+"""
+)
+# axcl_rt.h
+axclrt_cffi.cdef(
+    """
+    axclError axclrtGetVersion(int32_t *major, int32_t *minor, int32_t *patch);
+    const char *axclrtGetSocName();
+"""
+)
+# axcl_rt_device.h
+axclrt_cffi.cdef(
+    """
+    axclError axclrtGetDeviceList(axclrtDeviceList *deviceList);
+    axclError axclrtSetDevice(int32_t deviceId);
+    axclError axclrtResetDevice(int32_t deviceId);
+"""
+)
+# axcl_rt_context.h
+axclrt_cffi.cdef(
+    """
+    axclError axclrtCreateContext(axclrtContext *context, int32_t deviceId);
+    axclError axclrtDestroyContext(axclrtContext context);
+    axclError axclrtSetCurrentContext(axclrtContext context);
+    axclError axclrtGetCurrentContext(axclrtContext *context);
+    axclError axclrtGetDefaultContext(axclrtContext *context, int32_t deviceId);
+"""
+)
+# axcl_rt_engine.h
+axclrt_cffi.cdef(
+    """
+    axclError axclrtEngineInit(axclrtEngineVNpuKind npuKind);
+    axclError axclrtEngineGetVNpuKind(axclrtEngineVNpuKind *npuKind);
+    axclError axclrtEngineFinalize();
+    axclError axclrtEngineLoadFromFile(const char *modelPath, uint64_t *modelId);
+    axclError axclrtEngineLoadFromMem(const void *model, uint64_t modelSize, uint64_t *modelId);
+    const char* axclrtEngineGetModelCompilerVersion(uint64_t modelId);
+    axclError axclrtEngineUnload(uint64_t modelId);
+    axclError axclrtEngineGetIOInfo(uint64_t modelId, axclrtEngineIOInfo *ioInfo);
+    axclError axclrtEngineGetShapeGroupsCount(axclrtEngineIOInfo ioInfo, int32_t *count);
+    uint32_t axclrtEngineGetNumInputs(axclrtEngineIOInfo ioInfo);
+    uint32_t axclrtEngineGetNumOutputs(axclrtEngineIOInfo ioInfo);
+    uint64_t axclrtEngineGetInputSizeByIndex(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index);
+    uint64_t axclrtEngineGetOutputSizeByIndex(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index);
+    axclError axclrtEngineGetInputDims(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index, axclrtEngineIODims *dims);
+    axclError axclrtEngineGetOutputDims(axclrtEngineIOInfo ioInfo, uint32_t group, uint32_t index, axclrtEngineIODims *dims);
+    const char *axclrtEngineGetInputNameByIndex(axclrtEngineIOInfo ioInfo, uint32_t index);
+    const char *axclrtEngineGetOutputNameByIndex(axclrtEngineIOInfo ioInfo, uint32_t index);
+    int32_t axclrtEngineGetInputDataType(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataType *type);
+    int32_t axclrtEngineGetOutputDataType(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataType *type);
+    int32_t axclrtEngineGetInputDataLayout(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataLayout *layout);
+    int32_t axclrtEngineGetOutputDataLayout(axclrtEngineIOInfo ioInfo, uint32_t index, axclrtEngineDataLayout *layout);
+    axclError axclrtEngineCreateIO(axclrtEngineIOInfo ioInfo, axclrtEngineIO *io);
+    axclError axclrtEngineDestroyIO(axclrtEngineIO io);
+    axclError axclrtEngineSetInputBufferByIndex(axclrtEngineIO io, uint32_t index, const void *dataBuffer, uint64_t size);
+    axclError axclrtEngineSetOutputBufferByIndex(axclrtEngineIO io, uint32_t index, const void *dataBuffer, uint64_t size);
+    axclError axclrtEngineGetInputBufferByIndex(axclrtEngineIO io, uint32_t index, void **dataBuffer, uint64_t *size);
+    axclError axclrtEngineGetOutputBufferByIndex(axclrtEngineIO io, uint32_t index, void **dataBuffer, uint64_t *size);
+    axclError axclrtEngineCreateContext(uint64_t modelId, uint64_t *contextId);
+    axclError axclrtEngineExecute(uint64_t modelId, uint64_t contextId, uint32_t group, axclrtEngineIO io);
+"""
+)
+# axcl_rt_memory.h
+axclrt_cffi.cdef(
+    """
+    axclError axclrtMalloc(void **devPtr, size_t size, axclrtMemMallocPolicy policy);
+    axclError axclrtMallocCached(void **devPtr, size_t size, axclrtMemMallocPolicy policy);
+    axclError axclrtMemcpy(void *dstPtr, const void *srcPtr, size_t count, axclrtMemcpyKind kind);
+    axclError axclrtFree(void *devPtr);
+    axclError axclrtMemFlush(void *devPtr, size_t size);
+"""
+)
+rt_name = "axcl_rt"
+rt_path = ctypes.util.find_library(rt_name)
+assert (
+        rt_path is not None
+), f"Failed to find library {rt_name}. Please ensure it is installed and in the library path."
+axclrt_lib = axclrt_cffi.dlopen(rt_path)
+assert axclrt_lib is not None, f"Failed to load library {rt_path}. Please ensure it is installed and in the library path."

python/axengine/_axclrt_types.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+from enum import Enum
+class VNPUType(Enum):
+    DISABLED = 0
+    ENABLED = 1
+    BIG_LITTLE = 2
+    LITTLE_BIG = 3
+class ModelType(Enum):
+    SINGLE = 0
+    DUAL = 1
+    TRIPLE = 2

python/axengine/_axe.py ADDED Viewed

	@@ -0,0 +1,399 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+import atexit
+import os
+from typing import Any, Sequence
+import ml_dtypes as mldt
+import numpy as np
+from ._axe_capi import sys_lib, engine_cffi, engine_lib
+from ._axe_types import VNPUType, ModelType, ChipType
+from ._base_session import Session, SessionOptions
+from ._node import NodeArg
+__all__: ["AXEngineSession"]
+_is_sys_initialized = False
+_is_engine_initialized = False
+def _transform_dtype(dtype):
+    if dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_UINT8):
+        return np.dtype(np.uint8)
+    elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_SINT8):
+        return np.dtype(np.int8)
+    elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_UINT16):
+        return np.dtype(np.uint16)
+    elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_SINT16):
+        return np.dtype(np.int16)
+    elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_UINT32):
+        return np.dtype(np.uint32)
+    elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_SINT32):
+        return np.dtype(np.int32)
+    elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_FLOAT32):
+        return np.dtype(np.float32)
+    elif dtype == engine_cffi.cast("AX_ENGINE_DATA_TYPE_T", engine_lib.AX_ENGINE_DT_BFLOAT16):
+        return np.dtype(mldt.bfloat16)
+    else:
+        raise ValueError(f"Unsupported data type '{dtype}'.")
+def _check_cffi_func_exists(lib, func_name):
+    try:
+        getattr(lib, func_name)
+        return True
+    except AttributeError:
+        return False
+def _get_chip_type():
+    if not _check_cffi_func_exists(engine_lib, "AX_ENGINE_SetAffinity"):
+        return ChipType.M57H
+    elif not _check_cffi_func_exists(engine_lib, "AX_ENGINE_GetTotalOps"):
+        return ChipType.MC50
+    else:
+        return ChipType.MC20E
+def _get_version():
+    engine_version = engine_lib.AX_ENGINE_GetVersion()
+    return engine_cffi.string(engine_version).decode("utf-8")
+def _get_vnpu_type() -> VNPUType:
+    vnpu_type = engine_cffi.new("AX_ENGINE_NPU_ATTR_T *")
+    ret = engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type)
+    if 0 != ret:
+        raise RuntimeError("Failed to get VNPU attribute.")
+    return VNPUType(vnpu_type.eHardMode)
+def _initialize_engine():
+    global _is_sys_initialized, _is_engine_initialized
+    ret = sys_lib.AX_SYS_Init()
+    if ret != 0:
+        raise RuntimeError("Failed to initialize ax sys.")
+    _is_sys_initialized = True
+    # disabled mode by default
+    vnpu_type = engine_cffi.new("AX_ENGINE_NPU_ATTR_T *")
+    ret = engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type)
+    if 0 != ret:
+        # this means the NPU was not initialized
+        vnpu_type.eHardMode = engine_cffi.cast(
+            "AX_ENGINE_NPU_MODE_T", VNPUType.DISABLED.value
+        )
+    ret = engine_lib.AX_ENGINE_Init(vnpu_type)
+    if ret != 0:
+        raise RuntimeError("Failed to initialize ax sys engine.")
+    _is_engine_initialized = True
+    print(f"[INFO] Chip type: {_get_chip_type()}")
+    print(f"[INFO] VNPU type: {_get_vnpu_type()}")
+    print(f"[INFO] Engine version: {_get_version()}")
+def _finalize_engine():
+    global _is_sys_initialized, _is_engine_initialized
+    if _is_engine_initialized:
+        engine_lib.AX_ENGINE_Deinit()
+    if _is_sys_initialized:
+        sys_lib.AX_SYS_Deinit()
+_initialize_engine()
+atexit.register(_finalize_engine)
+class AXEngineSession(Session):
+    def __init__(
+            self,
+            path_or_bytes: str | bytes | os.PathLike,
+            sess_options: SessionOptions | None = None,
+            provider_options: dict[Any, Any] | None = None,
+            **kwargs,
+    ) -> None:
+        super().__init__()
+        self._chip_type = _get_chip_type()
+        self._vnpu_type = _get_vnpu_type()
+        # handle, context, info, io
+        self._handle = engine_cffi.new("uint64_t **")
+        self._context = engine_cffi.new("uint64_t **")
+        self._io = engine_cffi.new("AX_ENGINE_IO_T *")
+        # model buffer, almost copied from onnx runtime
+        if isinstance(path_or_bytes, (str, os.PathLike)):
+            self._model_name = os.path.splitext(os.path.basename(path_or_bytes))[0]
+            with open(path_or_bytes, "rb") as f:
+                data = f.read()
+            self._model_buffer = engine_cffi.new("char[]", data)
+            self._model_buffer_size = len(data)
+        elif isinstance(path_or_bytes, bytes):
+            self._model_buffer = engine_cffi.new("char[]", path_or_bytes)
+            self._model_buffer_size = len(path_or_bytes)
+        else:
+            raise TypeError(f"Unable to load model from type '{type(path_or_bytes)}'")
+        # get model type
+        self._model_type = self._get_model_type()
+        if self._chip_type is ChipType.MC20E:
+            if self._model_type is ModelType.FULL:
+                print(f"[INFO] Model type: {self._model_type.value} (full core)")
+            if self._model_type is ModelType.HALF:
+                print(f"[INFO] Model type: {self._model_type.value} (half core)")
+        if self._chip_type is ChipType.MC50:
+            if self._model_type is ModelType.SINGLE:
+                print(f"[INFO] Model type: {self._model_type.value} (single core)")
+            if self._model_type is ModelType.DUAL:
+                print(f"[INFO] Model type: {self._model_type.value} (dual core)")
+            if self._model_type is ModelType.TRIPLE:
+                print(f"[INFO] Model type: {self._model_type.value} (triple core)")
+        if self._chip_type is ChipType.M57H:
+            print(f"[INFO] Model type: {self._model_type.value} (single core)")
+        # check model type
+        if self._chip_type is ChipType.MC50:
+            # all types (single or dual or triple) of model are allowed in vnpu mode disabled
+            # only single core model is allowed in vnpu mode enabled
+            # only triple core model is NOT allowed in vnpu mode big-little or little-big
+            if self._vnpu_type is VNPUType.ENABLED:
+                if self._model_type is not ModelType.SINGLE:
+                    raise ValueError(
+                        f"Model type '{self._model_type}' is not allowed when vnpu is inited as {self._vnpu_type}."
+                    )
+            if (
+                    self._vnpu_type is VNPUType.BIG_LITTLE
+                    or self._vnpu_type is VNPUType.LITTLE_BIG
+            ):
+                if self._model_type is ModelType.TRIPLE:
+                    raise ValueError(
+                        f"Model type '{self._model_type}' is not allowed when vnpu is inited as {self._vnpu_type}."
+                    )
+        if self._chip_type is ChipType.MC20E:
+            # all types of full or half core model are allowed in vnpu mode disabled
+            # only half core model is allowed in vnpu mode enabled
+            if self._vnpu_type is VNPUType.ENABLED:
+                if self._model_type is ModelType.FULL:
+                    raise ValueError(
+                        f"Model type '{self._model_type}' is not allowed when vnpu is inited as {self._vnpu_type}."
+                    )
+        # if self._chip_type is ChipType.M57H:
+        # there only one type of model will be compiled, so no need to check
+        # load model
+        ret = self._load()
+        if 0 != ret:
+            raise RuntimeError("Failed to load model.")
+        print(f"[INFO] Compiler version: {self._get_model_tool_version()}")
+        # get shape group count
+        try:
+            self._shape_count = self._get_shape_count()
+        except AttributeError as e:
+            print(f"[WARNING] {e}")
+            self._shape_count = 1
+        # get model shape
+        self._info = self._get_info()
+        self._inputs = self._get_inputs()
+        self._outputs = self._get_outputs()
+        # fill model io
+        self._align = 128
+        self._cmm_token = engine_cffi.new("AX_S8[]", b"PyEngine")
+        self._io[0].nInputSize = len(self.get_inputs())
+        self._io[0].nOutputSize = len(self.get_outputs())
+        self._io[0].pInputs = engine_cffi.new(
+            "AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nInputSize)
+        )
+        self._io[0].pOutputs = engine_cffi.new(
+            "AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nOutputSize)
+        )
+        for i in range(len(self.get_inputs())):
+            max_buf = 0
+            for j in range(self._shape_count):
+                max_buf = max(max_buf, self._info[j][0].pInputs[i].nSize)
+            self._io[0].pInputs[i].nSize = max_buf
+            phy = engine_cffi.new("AX_U64*")
+            vir = engine_cffi.new("AX_VOID**")
+            ret = sys_lib.AX_SYS_MemAllocCached(
+                phy, vir, self._io[0].pInputs[i].nSize, self._align, self._cmm_token
+            )
+            if 0 != ret:
+                raise RuntimeError("Failed to allocate memory for input.")
+            self._io[0].pInputs[i].phyAddr = phy[0]
+            self._io[0].pInputs[i].pVirAddr = vir[0]
+        for i in range(len(self.get_outputs())):
+            max_buf = 0
+            for j in range(self._shape_count):
+                max_buf = max(max_buf, self._info[j][0].pOutputs[i].nSize)
+            self._io[0].pOutputs[i].nSize = max_buf
+            phy = engine_cffi.new("AX_U64*")
+            vir = engine_cffi.new("AX_VOID**")
+            ret = sys_lib.AX_SYS_MemAllocCached(
+                phy, vir, self._io[0].pOutputs[i].nSize, self._align, self._cmm_token
+            )
+            if 0 != ret:
+                raise RuntimeError("Failed to allocate memory for output.")
+            self._io[0].pOutputs[i].phyAddr = phy[0]
+            self._io[0].pOutputs[i].pVirAddr = vir[0]
+    def __del__(self):
+        self._unload()
+    def _get_model_type(self) -> ModelType:
+        model_type = engine_cffi.new("AX_ENGINE_MODEL_TYPE_T *")
+        ret = engine_lib.AX_ENGINE_GetModelType(
+            self._model_buffer, self._model_buffer_size, model_type
+        )
+        if 0 != ret:
+            raise RuntimeError("Failed to get model type.")
+        return ModelType(model_type[0])
+    def _get_model_tool_version(self):
+        model_tool_version = engine_lib.AX_ENGINE_GetModelToolsVersion(
+            self._handle[0]
+        )
+        return engine_cffi.string(model_tool_version).decode("utf-8")
+    def _load(self):
+        extra = engine_cffi.new("AX_ENGINE_HANDLE_EXTRA_T *")
+        extra_name = engine_cffi.new("char[]", self._model_name.encode("utf-8"))
+        extra.pName = extra_name
+        # for onnx runtime do not support one model multiple context running in multi-thread as far as I know, so
+        # the engine handle and context will create only once
+        ret = engine_lib.AX_ENGINE_CreateHandleV2(
+            self._handle, self._model_buffer, self._model_buffer_size, extra
+        )
+        if 0 == ret:
+            ret = engine_lib.AX_ENGINE_CreateContextV2(
+                self._handle[0], self._context
+            )
+        return ret
+    def _get_info(self):
+        total_info = []
+        if 1 == self._shape_count:
+            info = engine_cffi.new("AX_ENGINE_IO_INFO_T **")
+            ret = engine_lib.AX_ENGINE_GetIOInfo(self._handle[0], info)
+            if 0 != ret:
+                raise RuntimeError("Failed to get model shape.")
+            total_info.append(info)
+        else:
+            for i in range(self._shape_count):
+                info = engine_cffi.new("AX_ENGINE_IO_INFO_T **")
+                ret = engine_lib.AX_ENGINE_GetGroupIOInfo(
+                    self._handle[0], i, info
+                )
+                if 0 != ret:
+                    raise RuntimeError(f"Failed to get model the {i}th shape.")
+                total_info.append(info)
+        return total_info
+    def _get_shape_count(self):
+        count = engine_cffi.new("AX_U32 *")
+        ret = engine_lib.AX_ENGINE_GetGroupIOInfoCount(self._handle[0], count)
+        if 0 != ret:
+            raise RuntimeError("Failed to get model shape group.")
+        return count[0]
+    def _unload(self):
+        if self._handle[0] is not None:
+            engine_lib.AX_ENGINE_DestroyHandle(self._handle[0])
+        self._handle[0] = engine_cffi.NULL
+    def _get_io(self, io_type: str):
+        io_info = []
+        for group in range(self._shape_count):
+            one_group_io = []
+            for index in range(getattr(self._info[group][0], f'n{io_type}Size')):
+                current_io = getattr(self._info[group][0], f'p{io_type}s')[index]
+                name = engine_cffi.string(current_io.pName).decode("utf-8")
+                shape = [current_io.pShape[i] for i in range(current_io.nShapeSize)]
+                dtype = _transform_dtype(current_io.eDataType)
+                meta = NodeArg(name, dtype, shape)
+                one_group_io.append(meta)
+            io_info.append(one_group_io)
+        return io_info
+    def _get_inputs(self):
+        return self._get_io('Input')
+    def _get_outputs(self):
+        return self._get_io('Output')
+    def run(
+            self,
+            output_names: list[str],
+            input_feed: dict[str, np.ndarray],
+            run_options=None
+    ):
+        self._validate_input(input_feed)
+        self._validate_output(output_names)
+        if None is output_names:
+            output_names = [o.name for o in self.get_outputs()]
+        # fill model io
+        for key, npy in input_feed.items():
+            for i, one in enumerate(self.get_inputs()):
+                if one.name == key:
+                    assert (
+                            list(one.shape) == list(npy.shape) and one.dtype == npy.dtype
+                    ), f"model inputs({key}) expect shape {one.shape} and dtype {one.dtype}, however gets input with shape {npy.shape} and dtype {npy.dtype}"
+                    if not (
+                            not npy.flags.c_contiguous
+                            and npy.flags.f_contiguous
+                            and npy.flags.contiguous
+                    ):
+                        npy = np.ascontiguousarray(npy)
+                    npy_ptr = engine_cffi.cast("void *", npy.ctypes.data)
+                    engine_cffi.memmove(
+                        self._io[0].pInputs[i].pVirAddr, npy_ptr, npy.nbytes
+                    )
+                    sys_lib.AX_SYS_MflushCache(
+                        self._io[0].pInputs[i].phyAddr,
+                        self._io[0].pInputs[i].pVirAddr,
+                        self._io[0].pInputs[i].nSize,
+                    )
+                    break
+        # execute model
+        ret = engine_lib.AX_ENGINE_RunSyncV2(
+            self._handle[0], self._context[0], self._io
+        )
+        # flush output
+        outputs = []
+        if 0 == ret:
+            for i in range(len(self.get_outputs())):
+                sys_lib.AX_SYS_MinvalidateCache(
+                    self._io[0].pOutputs[i].phyAddr,
+                    self._io[0].pOutputs[i].pVirAddr,
+                    self._io[0].pOutputs[i].nSize,
+                )
+                npy = np.frombuffer(
+                    engine_cffi.buffer(
+                        self._io[0].pOutputs[i].pVirAddr, self._io[0].pOutputs[i].nSize
+                    ),
+                    dtype=self.get_outputs()[i].dtype,
+                ).reshape(self.get_outputs()[i].shape)
+                name = self.get_outputs()[i].name
+                if name in output_names:
+                    outputs.append(npy)
+            return outputs
+        else:
+            raise RuntimeError("Failed to run model.")

python/axengine/_axe_capi.py ADDED Viewed

	@@ -0,0 +1,323 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+import ctypes.util
+import platform
+from cffi import FFI
+__all__: ["sys_lib", "sys_cffi", "engine_lib", "engine_cffi"]
+sys_cffi = FFI()
+# ax_base_type.h
+sys_cffi.cdef(
+    """
+    typedef int                         AX_S32;
+    typedef unsigned int                AX_U32;
+    typedef unsigned long long int      AX_U64;
+    typedef signed char                 AX_S8;
+    typedef void                        AX_VOID;
+"""
+)
+# ax_sys_api.h
+sys_cffi.cdef(
+    """
+    AX_S32 AX_SYS_Init(AX_VOID);
+    AX_S32 AX_SYS_Deinit(AX_VOID);
+    AX_S32 AX_SYS_MemAllocCached(AX_U64 *phyaddr, AX_VOID **pviraddr, AX_U32 size, AX_U32 align, const AX_S8 *token);
+    AX_S32 AX_SYS_MemFree(AX_U64 phyaddr, AX_VOID *pviraddr);
+    AX_S32 AX_SYS_MflushCache(AX_U64 phyaddr, AX_VOID *pviraddr, AX_U32 size);
+    AX_S32 AX_SYS_MinvalidateCache(AX_U64 phyaddr, AX_VOID *pviraddr, AX_U32 size);
+"""
+)
+sys_name = "ax_sys"
+sys_path = ctypes.util.find_library(sys_name)
+assert (
+    sys_path is not None
+), f"Failed to find library {sys_name}. Please ensure it is installed and in the library path."
+sys_lib = sys_cffi.dlopen(sys_path)
+assert sys_lib is not None, f"Failed to load library {sys_path}. Please ensure it is installed and in the library path."
+engine_cffi = FFI()
+# ax_base_type.h
+engine_cffi.cdef(
+    """
+    typedef unsigned long long int      AX_U64;
+    typedef unsigned int                AX_U32;
+    typedef unsigned char               AX_U8;
+    typedef int                         AX_S32;
+    typedef signed char                 AX_S8;
+    typedef char                        AX_CHAR;
+    typedef void                        AX_VOID;
+    typedef enum {
+        AX_FALSE = 0,
+        AX_TRUE  = 1,
+    } AX_BOOL;
+"""
+)
+# ax_engine_type.h, base type
+engine_cffi.cdef(
+    """
+    typedef AX_U32                      AX_ENGINE_NPU_SET_T;
+"""
+)
+# ax_engine_type.h, enum
+engine_cffi.cdef(
+    """
+    typedef enum _AX_ENGINE_TENSOR_LAYOUT_E
+    {
+        AX_ENGINE_TENSOR_LAYOUT_UNKNOWN = 0,
+        AX_ENGINE_TENSOR_LAYOUT_NHWC    = 1,
+        AX_ENGINE_TENSOR_LAYOUT_NCHW    = 2,
+    } AX_ENGINE_TENSOR_LAYOUT_T;
+    typedef enum
+    {
+        AX_ENGINE_MT_PHYSICAL           = 0,
+        AX_ENGINE_MT_VIRTUAL            = 1,
+        AX_ENGINE_MT_OCM                = 2,
+    } AX_ENGINE_MEMORY_TYPE_T;
+    typedef enum
+    {
+        AX_ENGINE_DT_UNKNOWN            = 0,
+        AX_ENGINE_DT_UINT8              = 1,
+        AX_ENGINE_DT_UINT16             = 2,
+        AX_ENGINE_DT_FLOAT32            = 3,
+        AX_ENGINE_DT_SINT16             = 4,
+        AX_ENGINE_DT_SINT8              = 5,
+        AX_ENGINE_DT_SINT32             = 6,
+        AX_ENGINE_DT_UINT32             = 7,
+        AX_ENGINE_DT_FLOAT64            = 8,
+        AX_ENGINE_DT_BFLOAT16           = 9,
+        AX_ENGINE_DT_UINT10_PACKED      = 100,
+        AX_ENGINE_DT_UINT12_PACKED      = 101,
+        AX_ENGINE_DT_UINT14_PACKED      = 102,
+        AX_ENGINE_DT_UINT16_PACKED      = 103,
+    } AX_ENGINE_DATA_TYPE_T;
+    typedef enum
+    {
+        AX_ENGINE_CS_FEATUREMAP         = 0,
+        AX_ENGINE_CS_RAW8               = 12,
+        AX_ENGINE_CS_RAW10              = 1,
+        AX_ENGINE_CS_RAW12              = 2,
+        AX_ENGINE_CS_RAW14              = 11,
+        AX_ENGINE_CS_RAW16              = 3,
+        AX_ENGINE_CS_NV12               = 4,
+        AX_ENGINE_CS_NV21               = 5,
+        AX_ENGINE_CS_RGB                = 6,
+        AX_ENGINE_CS_BGR                = 7,
+        AX_ENGINE_CS_RGBA               = 8,
+        AX_ENGINE_CS_GRAY               = 9,
+        AX_ENGINE_CS_YUV444             = 10,
+    } AX_ENGINE_COLOR_SPACE_T;
+"""
+)
+# ax_engine_type.h, architecturally agnostic struct
+engine_cffi.cdef(
+    """
+    typedef enum {
+        AX_ENGINE_VIRTUAL_NPU_DISABLE   = 0,
+    } AX_ENGINE_NPU_MODE_T;
+    typedef enum {
+        AX_ENGINE_MODEL_TYPE0           = 0,
+    } AX_ENGINE_MODEL_TYPE_T;
+    typedef struct {
+        AX_ENGINE_NPU_MODE_T            eHardMode;
+        AX_U32                          reserve[8];
+    } AX_ENGINE_NPU_ATTR_T;
+    typedef struct _AX_ENGINE_IO_META_EX_T
+    {
+        AX_ENGINE_COLOR_SPACE_T         eColorSpace;
+        AX_U64                          u64Reserved[18];
+    } AX_ENGINE_IO_META_EX_T;
+    typedef struct {
+        AX_ENGINE_NPU_SET_T             nNpuSet;
+        AX_S8*                          pName;
+        AX_U32                          reserve[8];
+    } AX_ENGINE_HANDLE_EXTRA_T;
+    typedef struct _AX_ENGINE_CMM_INFO_T
+    {
+        AX_U32                          nCMMSize;
+    } AX_ENGINE_CMM_INFO_T;
+    typedef struct _AX_ENGINE_IO_SETTING_T
+    {
+        AX_U32                          nWbtIndex;
+        AX_U64                          u64Reserved[7];
+    }AX_ENGINE_IO_SETTING_T;
+"""
+)
+# check architecture, 32bit or 64bit
+arch = platform.architecture()[0]
+# ax_engine_type.h, struct
+if arch == "64bit":
+    engine_cffi.cdef(
+        """
+        typedef struct _AX_ENGINE_IO_META_T
+        {
+            AX_CHAR*                    pName;
+            AX_S32*                     pShape;
+            AX_U8                       nShapeSize;
+            AX_ENGINE_TENSOR_LAYOUT_T   eLayout;
+            AX_ENGINE_MEMORY_TYPE_T     eMemoryType;
+            AX_ENGINE_DATA_TYPE_T       eDataType;
+            AX_ENGINE_IO_META_EX_T*     pExtraMeta;
+            AX_U32                      nSize;
+            AX_U32                      nQuantizationValue;
+            AX_S32*                     pStride;
+            AX_U64                      u64Reserved[9];
+        } AX_ENGINE_IO_META_T;
+        typedef struct _AX_ENGINE_IO_INFO_T
+        {
+            AX_ENGINE_IO_META_T*        pInputs;
+            AX_U32                      nInputSize;
+            AX_ENGINE_IO_META_T*        pOutputs;
+            AX_U32                      nOutputSize;
+            AX_U32                      nMaxBatchSize;
+            AX_BOOL                     bDynamicBatchSize;
+            AX_U64                      u64Reserved[11];
+        } AX_ENGINE_IO_INFO_T;
+        typedef struct _AX_ENGINE_IO_BUFFER_T
+        {
+            AX_U64                      phyAddr;
+            AX_VOID*                    pVirAddr;
+            AX_U32                      nSize;
+            AX_S32*                     pStride;
+            AX_U8                       nStrideSize;
+            AX_U64                      u64Reserved[11];
+        } AX_ENGINE_IO_BUFFER_T;
+        typedef struct _AX_ENGINE_IO_T
+        {
+            AX_ENGINE_IO_BUFFER_T*      pInputs;
+            AX_U32                      nInputSize;
+            AX_ENGINE_IO_BUFFER_T*      pOutputs;
+            AX_U32                      nOutputSize;
+            AX_U32                      nBatchSize;
+            AX_ENGINE_IO_SETTING_T*     pIoSetting;
+            AX_U64                      u64Reserved[10];
+        } AX_ENGINE_IO_T;
+    """
+    )
+else:
+    engine_cffi.cdef(
+        """
+        typedef struct _AX_ENGINE_IO_META_T
+        {
+            AX_CHAR*                    pName;
+            AX_S32*                     pShape;
+            AX_U8                       nShapeSize;
+            AX_ENGINE_TENSOR_LAYOUT_T   eLayout;
+            AX_ENGINE_MEMORY_TYPE_T     eMemoryType;
+            AX_ENGINE_DATA_TYPE_T       eDataType;
+            AX_ENGINE_IO_META_EX_T*     pExtraMeta;
+            AX_U32                      nSize;
+            AX_U32                      nQuantizationValue;
+            AX_S32*                     pStride;
+            AX_U64 u64Reserved[11];
+        } AX_ENGINE_IO_META_T;
+        typedef struct _AX_ENGINE_IO_INFO_T
+        {
+            AX_ENGINE_IO_META_T*        pInputs;
+            AX_U32                      nInputSize;
+            AX_ENGINE_IO_META_T*        pOutputs;
+            AX_U32                      nOutputSize;
+            AX_U32                      nMaxBatchSize;
+            AX_BOOL                     bDynamicBatchSize;
+            AX_U64                      u64Reserved[13];
+        } AX_ENGINE_IO_INFO_T;
+        typedef struct _AX_ENGINE_IO_BUFFER_T
+        {
+            AX_U64                      phyAddr;
+            AX_VOID*                    pVirAddr;
+            AX_U32                      nSize;
+            AX_S32*                     pStride;
+            AX_U8                       nStrideSize;
+            AX_U64                      u64Reserved[13];
+        } AX_ENGINE_IO_BUFFER_T;
+        typedef struct _AX_ENGINE_IO_T
+        {
+            AX_ENGINE_IO_BUFFER_T*      pInputs;
+            AX_U32                      nInputSize;
+            AX_ENGINE_IO_BUFFER_T*      pOutputs;
+            AX_U32                      nOutputSize;
+            AX_U32                      nBatchSize;
+            AX_ENGINE_IO_SETTING_T*     pIoSetting;
+            AX_U64                      u64Reserved[12];
+        } AX_ENGINE_IO_T;
+    """
+    )
+# ax_engine_api.h
+engine_cffi.cdef(
+    """
+    const AX_CHAR* AX_ENGINE_GetVersion(AX_VOID);
+    AX_VOID AX_ENGINE_NPUReset(AX_VOID);
+    AX_S32 AX_ENGINE_Init(AX_ENGINE_NPU_ATTR_T* pNpuAttr);
+    AX_S32 AX_ENGINE_GetVNPUAttr(AX_ENGINE_NPU_ATTR_T* pNpuAttr);
+    AX_S32 AX_ENGINE_Deinit(AX_VOID);
+    AX_S32 AX_ENGINE_GetModelType(const AX_VOID* pData, AX_U32 nDataSize, AX_ENGINE_MODEL_TYPE_T* pModelType);
+    AX_S32 AX_ENGINE_CreateHandleV2(uint64_t** pHandle, const AX_VOID* pData, AX_U32 nDataSize, AX_ENGINE_HANDLE_EXTRA_T* pExtraParam);
+    AX_S32 AX_ENGINE_DestroyHandle(uint64_t* nHandle);
+    AX_S32 AX_ENGINE_GetIOInfo(uint64_t* nHandle, AX_ENGINE_IO_INFO_T** pIO);
+    AX_S32 AX_ENGINE_GetGroupIOInfoCount(uint64_t* nHandle, AX_U32* pCount);
+    AX_S32 AX_ENGINE_GetGroupIOInfo(uint64_t* nHandle, AX_U32 nIndex, AX_ENGINE_IO_INFO_T** pIO);
+    AX_S32 AX_ENGINE_GetHandleModelType(uint64_t* nHandle, AX_ENGINE_MODEL_TYPE_T* pModelType);
+    AX_S32 AX_ENGINE_CreateContextV2(uint64_t* nHandle, uint64_t** pContext);
+    AX_S32 AX_ENGINE_RunSyncV2(uint64_t* handle, uint64_t* context, AX_ENGINE_IO_T* pIO);
+    AX_S32 AX_ENGINE_RunGroupIOSync(uint64_t* handle, uint64_t* context, AX_U32 nIndex, AX_ENGINE_IO_T* pIO);
+    AX_S32 AX_ENGINE_SetAffinity(uint64_t* nHandle, AX_ENGINE_NPU_SET_T nNpuSet);
+    AX_S32 AX_ENGINE_GetAffinity(uint64_t* nHandle, AX_ENGINE_NPU_SET_T* pNpuSet);
+    AX_S32 AX_ENGINE_GetCMMUsage(uint64_t* nHandle, AX_ENGINE_CMM_INFO_T* pCMMInfo);
+    const AX_CHAR* AX_ENGINE_GetModelToolsVersion(uint64_t* nHandle);
+    // internal use api, remember no question
+    AX_S32 AX_ENGINE_GetTotalOps();
+"""
+)
+engine_name = "ax_engine"
+engine_path = ctypes.util.find_library(engine_name)
+assert (
+    engine_path is not None
+), f"Failed to find library {engine_name}. Please ensure it is installed and in the library path."
+engine_lib = engine_cffi.dlopen(engine_path)
+assert engine_lib is not None, f"Failed to load library {engine_path}. Please ensure it is installed and in the library path."

python/axengine/_axe_types.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+from enum import Enum
+class VNPUType(Enum):
+    DISABLED = 0
+    ENABLED = 1
+    BIG_LITTLE = 2
+    LITTLE_BIG = 3
+class ModelType(Enum):
+    HALF = 0  # for MC20E, which means chip is AX630C(x), or AX620Q(x)
+    FULL = 1  # for MC20E
+    SINGLE = 0  # for MC50, which means chip is AX650A or AX650N, and M57H
+    DUAL = 1  # for MC50
+    TRIPLE = 2  # for MC50
+class ChipType(Enum):
+    MC20E = 0
+    MC50 = 1
+    M57H = 2

python/axengine/_base_session.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+from abc import ABC, abstractmethod
+import numpy as np
+from ._node import NodeArg
+class SessionOptions:
+    pass
+class Session(ABC):
+    def __init__(self) -> None:
+        self._shape_count = 0
+        self._inputs = []
+        self._outputs = []
+    def _validate_input(self, feed_input_names: dict[str, np.ndarray]):
+        missing_input_names = []
+        for i in self.get_inputs():
+            if i.name not in feed_input_names:
+                missing_input_names.append(i.name)
+        if missing_input_names:
+            raise ValueError(
+                f"Required inputs ({missing_input_names}) are missing from input feed ({feed_input_names}).")
+    def _validate_output(self, output_names: list[str]):
+        if output_names is not None:
+            for name in output_names:
+                if name not in [o.name for o in self.get_outputs()]:
+                    raise ValueError(f"Output name '{name}' is not in model outputs name list.")
+    def get_inputs(self, shape_group: int = 0) -> list[NodeArg]:
+        if shape_group > self._shape_count:
+            raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.")
+        selected_info = self._inputs[shape_group]
+        return selected_info
+    def get_outputs(self, shape_group: int = 0) -> list[NodeArg]:
+        if shape_group > self._shape_count:
+            raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.")
+        selected_info = self._outputs[shape_group]
+        return selected_info
+    @abstractmethod
+    def run(
+            self,
+            output_names: list[str] | None,
+            input_feed: dict[str, np.ndarray],
+            run_options=None
+    ) -> list[np.ndarray]:
+        pass

python/axengine/_node.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+class NodeArg(object):
+    def __init__(self, name, dtype, shape):
+        self.name = name
+        self.dtype = dtype
+        self.shape = shape

python/axengine/_providers.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+import ctypes.util as cutil
+providers = []
+axengine_provider_name = 'AxEngineExecutionProvider'
+axclrt_provider_name = 'AXCLRTExecutionProvider'
+_axengine_lib_name = 'ax_engine'
+_axclrt_lib_name = 'axcl_rt'
+# check if axcl_rt is installed, so if available, it's the default provider
+if cutil.find_library(_axclrt_lib_name) is not None:
+    providers.append(axclrt_provider_name)
+# check if ax_engine is installed
+if cutil.find_library(_axengine_lib_name) is not None:
+    providers.append(axengine_provider_name)
+def get_all_providers():
+    return [axengine_provider_name, axclrt_provider_name]
+def get_available_providers():
+    return providers

python/axengine/_session.py ADDED Viewed

	@@ -0,0 +1,117 @@

+# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved.
+#
+# This source file is the property of Axera Semiconductor Co., Ltd. and
+# may not be copied or distributed in any isomorphic form without the prior
+# written consent of Axera Semiconductor Co., Ltd.
+#
+import os
+from typing import Any, Sequence
+import numpy as np
+from ._base_session import SessionOptions
+from ._node import NodeArg
+from ._providers import axclrt_provider_name, axengine_provider_name
+from ._providers import get_available_providers
+class InferenceSession:
+    def __init__(
+            self,
+            path_or_bytes: str | bytes | os.PathLike,
+            sess_options: SessionOptions | None = None,
+            providers: Sequence[str | tuple[str, dict[Any, Any]]] | None = None,
+            provider_options: Sequence[dict[Any, Any]] | None = None, **kwargs,
+    ) -> None:
+        self._sess = None
+        self._sess_options = sess_options
+        self._provider = None
+        self._provider_options = None
+        self._available_providers = get_available_providers()
+        # the providers should be available at least one, checked in __init__.py
+        if providers is None:
+            # using first available provider as default
+            _provider_name = self._available_providers[0]
+            self._provider = _provider_name
+        else:
+            # if only one provider is specified
+            if isinstance(providers, str):
+                if providers not in self._available_providers:
+                    raise ValueError(f"Selected provider: '{providers}' is not available.")
+                self._provider = providers
+            # if multiple providers are specified, using the first one as default
+            elif isinstance(providers, list):
+                _unavailable_provider = []
+                for p in providers:
+                    assert isinstance(p, str) or isinstance(p, tuple), \
+                        f"Invalid provider type: {type(p)}. Must be str or tuple."
+                    if isinstance(p, str):
+                        if p not in self._available_providers:
+                            _unavailable_provider.append(p)
+                        elif self._provider is None:
+                            self._provider = p
+                    if isinstance(p, tuple):
+                        assert len(p) == 2, f"Invalid provider type: {p}. Must be tuple with 2 elements."
+                        assert isinstance(p[0], str), f"Invalid provider type: {type(p[0])}. Must be str."
+                        assert isinstance(p[1], dict), f"Invalid provider type: {type(p[1])}. Must be dict."
+                        if p[0] not in self._available_providers:
+                            _unavailable_provider.append(p[0])
+                        elif self._provider is None:
+                            self._provider = p[0]
+                            # FIXME: check provider options
+                            self._provider_options = p[1]
+                if _unavailable_provider:
+                    if self._provider is None:
+                        raise ValueError(f"Selected provider(s): {_unavailable_provider} is(are) not available.")
+                    else:
+                        print(f"[WARNING] Selected provider(s): {_unavailable_provider} is(are) not available.")
+        # FIXME: can we remove this check?
+        if self._provider is None:
+            raise ValueError(f"No available provider found in {providers}.")
+        print(f"[INFO] Using provider: {self._provider}")
+        if self._provider == axclrt_provider_name:
+            from ._axclrt import AXCLRTSession
+            self._sess = AXCLRTSession(path_or_bytes, sess_options, provider_options, **kwargs)
+        if self._provider == axengine_provider_name:
+            from ._axe import AXEngineSession
+            self._sess = AXEngineSession(path_or_bytes, sess_options, provider_options, **kwargs)
+        if self._sess is None:
+            raise RuntimeError(f"Create session failed with provider: {self._provider}")
+    # add to support 'with' statement
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        # not suppress exceptions
+        return False
+    def get_session_options(self):
+        """
+        Return the session options. See :class:`axengine.SessionOptions`.
+        """
+        return self._sess_options
+    def get_providers(self):
+        """
+        Return list of registered execution providers.
+        """
+        return self._provider
+    def get_inputs(self, shape_group: int = 0) -> list[NodeArg]:
+        return self._sess.get_inputs(shape_group)
+    def get_outputs(self, shape_group: int = 0) -> list[NodeArg]:
+        return self._sess.get_outputs(shape_group)
+    def run(
+            self,
+            output_names: list[str] | None,
+            input_feed: dict[str, np.ndarray],
+            run_options=None
+    ) -> list[np.ndarray]:
+        return self._sess.run(output_names, input_feed, run_options)

python/examples/demo01.jpg ADDED Viewed

python/examples/demo02.jpg ADDED Viewed

python/examples/demo03.jpg ADDED Viewed

python/examples/demo04.jpg ADDED Viewed

python/examples/demo05.jpg ADDED Viewed

python/examples/demo06.jpg ADDED Viewed

python/examples/demo07.jpg ADDED Viewed

python/examples/demo08.jpg ADDED Viewed

python/examples/demo09.jpg ADDED Viewed

python/examples/demo10.jpg ADDED Viewed

python/examples/demo11.jpg ADDED Viewed

python/examples/demo12.jpg ADDED Viewed

python/examples/demo13.jpg ADDED Viewed

python/examples/demo14.jpg ADDED Viewed

python/examples/demo15.jpg ADDED Viewed

python/examples/demo16.jpg ADDED Viewed

python/examples/demo17.jpg ADDED Viewed

python/examples/demo18.jpg ADDED Viewed

python/examples/demo19.jpg ADDED Viewed

Git LFS Details

SHA256: 7cdb09c34eb0b4d2ac5f6070aec47c8f983a0b1b2c9ee1fc30decafb64f1bd98
Pointer size: 132 Bytes
Size of remote file: 1 MB

python/examples/demo20.jpg ADDED Viewed

python/infer.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import argparse
+import cv2
+import numpy as np
+from axengine import InferenceSession
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--img",
+        type=str,
+        required=True,
+        help="Path to input image.",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        required=True,
+        help="Path to ONNX model.",
+    )
+    return parser.parse_args()
+def infer(img: str, model: str, viz: bool = False):
+    img_raw = cv2.imread(img)
+    image = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
+    orig_h, orig_w = image.shape[:2]
+    image = cv2.resize(image, (518,518) )
+    image = image[None]
+    session = InferenceSession(path_or_bytes=model, providers= ['AxEngineExecutionProvider', 'AXCLRTExecutionProvider'])
+    depth = session.run(output_names=["output"], input_feed={"input":image})[0]
+    depth = cv2.resize(depth[0, 0], (orig_w, orig_h))
+    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+    depth = depth.astype(np.uint8)
+    depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
+    combined_result = cv2.hconcat([img_raw,  depth_color])
+    cv2.imwrite("output-ax.png", combined_result)
+    return depth
+if __name__ == "__main__":
+    args = parse_args()
+    infer(**vars(args))

python/infer_onnx.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import argparse
+import cv2
+import numpy as np
+import onnxruntime as ort
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--img",
+        type=str,
+        required=True,
+        help="Path to input image.",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        required=True,
+        help="Path to ONNX model.",
+    )
+    return parser.parse_args()
+def infer(img: str, model: str, viz: bool = False):
+    img_raw = cv2.imread(img)
+    image = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
+    orig_h, orig_w = image.shape[:2]
+    image = cv2.resize(image, (518,518) )
+    mean = np.array([123.675, 116.28, 103.53],dtype=np.float32).reshape(1,1,3)
+    std = np.array([58.395, 57.12, 57.375],dtype=np.float32).reshape(1,1,3)
+    image = (image-mean)/std
+    image = image.transpose(2,0,1)
+    image = image[None]
+    session = ort.InferenceSession(
+        model, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+    )
+    depth = session.run(None, {"input": image})[0]
+    depth = cv2.resize(depth[0, 0], (orig_w, orig_h))
+    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+    depth = depth.astype(np.uint8)
+    depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
+    combined_result = cv2.hconcat([img_raw,  depth_color])
+    cv2.imwrite("output-onnx.png", combined_result)
+    return depth
+if __name__ == "__main__":
+    args = parse_args()
+    infer(**vars(args))

python/output.png ADDED Viewed

Git LFS Details

SHA256: e91f86e1a9584ccbd4f484e7e4b74673b633f35eab50a2ec05b28b174d9392c1
Pointer size: 132 Bytes
Size of remote file: 4.33 MB

python/requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+onnx
+onnxruntime
+opencv-python