Add NBG runner python binding (#677)

Co-authored-by: Xiaoran Weng <xiaoran.weng@verisilicon.com>
2024-01-10 16:31:28 +08:00 · 2024-01-10 16:31:28 +08:00 · 394cedcfe6
parent 54b9c6750e
commit 394cedcfe6
23 changed files with 2072 additions and 0 deletions
--- a/samples/nbg_runner_pybind/.clang-format
+++ b/samples/nbg_runner_pybind/.clang-format
@ -0,0 +1,4 @@
+BasedOnStyle: Google
+PointerAlignment: Left
+DerivePointerAlignment: false
+ColumnLimit: 80
--- a/samples/nbg_runner_pybind/.clang-tidy
+++ b/samples/nbg_runner_pybind/.clang-tidy
@ -0,0 +1,69 @@
+Checks: >
+  -*,
+  bugprone-*,
+  -bugprone-easily-swappable-parameters,
+  performance-*,
+  portability-*,
+  readability-*,
+  modernize-*,
+  -modernize-loop-convert,
+  -modernize-use-trailing-return-type,
+  clang-analyzer-*,
+  misc-*,
+  google-*,
+
+HeaderFilterRegex: "(src/).*\\.(hpp|h)$"
+
+CheckOptions:
+  - key: readability-identifier-naming.NamespaceCase         
+    value: snake_case
+  - key: readability-identifier-naming.ClassCase           
+    value: CamelCase
+  - key: readability-identifier-naming.StructCase           
+    value: CamelCase 
+  - key: readability-identifier-naming.EnumCase           
+    value: CamelCase 
+  - key: readability-identifier-naming.TemplateParameterCase
+    value: CamelCase
+  - key: readability-identifier-naming.FunctionCase
+    value: lower_case
+  - key: readability-identifier-naming.VariableCase   
+    value: lower_case
+  - key: readability-identifier-naming.ClassMemberCase
+    value: lower_case
+  - key: readability-identifier-naming.ClassMemberSuffix
+    value: _
+  - key: readability-identifier-naming.PrivateMemberCase
+    value: lower_case
+  - key: readability-identifier-naming.PrivateMemberSuffix
+    value: _
+  - key: readability-identifier-naming.ClassMethodCase  
+    value: lower_case
+  - key: readability-identifier-naming.EnumConstantCase 
+    value: CamelCase
+  - key: readability-identifier-naming.EnumConstantPrefix
+    value: k
+  - key: readability-identifier-naming.ConstexprVariableCase
+    value: CamelCase
+  - key: readability-identifier-naming.ConstexprVariablePrefix
+    value: k
+  - key: readability-identifier-naming.GlobalConstantCase
+    value: CamelCase
+  - key: readability-identifier-naming.GlobalConstantPrefix
+    value: k
+  - key: readability-identifier-naming.MemberConstantCase  
+    value: CamelCase
+  - key: readability-identifier-naming.MemberConstantPrefix
+    value: k
+  - key: readability-identifier-naming.StaticConstantCase  
+    value: CamelCase
+  - key: readability-identifier-naming.StaticConstantPrefix
+    value: k
+  - key: readability-identifier-length.IgnoredVariableNames
+    value: "^c|d|e|m|op|fd|it$"
+  - key: readability-identifier-length.IgnoredParameterNames
+    value: "^c|d|e|m|op|fd|it$"
+  - key: readability-function-cognitive-complexity.IgnoreMacros
+    value: true
+
+FormatStyle: file
--- a/samples/nbg_runner_pybind/.gitignore
+++ b/samples/nbg_runner_pybind/.gitignore
@ -0,0 +1,15 @@
+[Bb]uild/
+*_build/
+.vscode/
+.cache/
+__pycache__/
+*.pyc
+temp/
+tmp/
+*.o
+*.a
+*.lib
+*.dll
+*.so
+*.nb
+.env
--- a/samples/nbg_runner_pybind/CMakeLists.txt
+++ b/samples/nbg_runner_pybind/CMakeLists.txt
@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.18)
+project(nbg_runner LANGUAGES C CXX)
+
+# Set C/C++ standard.
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_STANDARD_REQUIRED ON)
+set(CMAKE_C_EXTENSIONS OFF)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Enable warnings as errors.
+add_compile_options(-Wall -Werror)
+
+# Add custom CMake modules.
+list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)
+
+# Find dependencies.
+include(FindPyBind11)
+include(FindVivanteSDK)
+
+add_subdirectory(src)
--- a/samples/nbg_runner_pybind/README.md
+++ b/samples/nbg_runner_pybind/README.md
@ -0,0 +1,84 @@
+# VSI NBG Runner Python Binding
+
+This project is a python package that wraps OpenVX API using pybind11. It provides simple python API to load, query and run model NBG.
+
+## Installation
+
+```cmake
+cmake -B build -DVIVANTE_SDK_DIR=${VIV_SDK_INSTALL_PATH}
+cmake --build build
+```
+
+The built python binding lib can be found at
+`build/src/_nbg_runner.cpython-{python_version}-{platform}.so`. Place the lib into `python/nbg_runner/_binding/`.
+
+## Usage
+
+### Tensor Info
+
+| Field             | Type              |  Value Sample |
+|:-----------------:|:-----------------:|:-------------:|
+| rank              | int               | 4             |
+| shape             | Tuple[int, ...]   | (1,3,224,224) |
+| dtype             | str               | "uint8"       |
+| qtype             | str               | "affine"      |
+| scale             | float             | 0.007874      |
+| zero_point        | int               | 128           |
+| fixed_point_pos   | int               | 0             |
+
+- `shape` is in C-style row major order, which is consistent with NumPy.
+
+### Set Environment Vars
+
+```shell
+# Set HW target If the driver is compiled with vsimulator.
+VSIMULATOR_CONFIG=VIP9000ULSI_PID0XBA
+# Locate the OVX driver.
+VIVANTE_SDK_DIR=${VIV_SDK_INSTALL_PATH}
+LD_LIBRARY_PATH=${VIVANTE_SDK_DIR}/[lib|lib64|drivers]
+# Set PYTHONPATH to the dir containing nbg_runner module.
+PYTHONPATH=${workspaceFolder}/python
+```
+
+### Example
+
+See detailed examples in `examples/*.py`
+
+```python
+from nbg_runner import OVXExecutor
+
+# Load a model NBG file.
+executor = OVXExecutor("path/to/model.nbg")
+
+# Query model I/O tensors count.
+num_inputs = executor.get_num_inputs()
+num_outputs = executor.get_num_outputs()
+
+# Get I/O tensor info by index.
+input_info = executor.get_input_info(0)
+output_info = executor.get_output_info(0)
+
+# Or get all I/O tensors infos at once.
+input_infos = executor.get_input_infos()
+output_infos = executor.get_output_infos()
+
+# Prepare inputs.
+input_tensors: List[NDArray] = ...
+
+# Set input tensor by index.
+for i, input_tensor in enumerate(input_tensors):
+    executor.set_input(i, input_tensor)
+
+# Or set all input tensors at once.
+executor.set_inputs(input_tensors)
+
+# Run inference.
+executor.run()
+
+# Get output tensor by index.
+for i in range(num_outputs):
+    output_tensor = executor.get_output(i)
+
+# Or get all output tensors at once.
+output_tensors = executor.get_outputs()
+```
--- a/samples/nbg_runner_pybind/cmake/modules/FindPyBind11.cmake
+++ b/samples/nbg_runner_pybind/cmake/modules/FindPyBind11.cmake
@ -0,0 +1,13 @@
+# Try to use installed pybind11 CMake module.
+find_package(pybind11)
+
+if(NOT ${pybind11_FOUND})
+  include(FetchContent)
+
+  FetchContent_Declare(
+    pybind11
+    GIT_REPOSITORY "https://github.com/pybind/pybind11.git"
+    GIT_TAG "v2.11.1"
+  )
+  FetchContent_MakeAvailable(pybind11)
+endif()
--- a/samples/nbg_runner_pybind/cmake/modules/FindVivanteSDK.cmake
+++ b/samples/nbg_runner_pybind/cmake/modules/FindVivanteSDK.cmake
@ -0,0 +1,14 @@
+if(NOT VIVANTE_SDK_DIR)
+  message(FATAL_ERROR "VIVANTE_SDK_DIR is not set")
+endif()
+
+find_library(
+  OPENVX_LIB
+  NAMES OpenVX
+  HINTS ${VIVANTE_SDK_DIR}/lib ${VIVANTE_SDK_DIR}/lib64 ${VIVANTE_SDK_DIR}/drivers
+  REQUIRED
+)
+
+add_library(viv_sdk INTERFACE)
+target_link_libraries(viv_sdk INTERFACE ${OPENVX_LIB})
+target_include_directories(viv_sdk INTERFACE ${VIVANTE_SDK_DIR}/include)
--- a/samples/nbg_runner_pybind/examples/data/bird.jpg
+++ b/samples/nbg_runner_pybind/examples/data/bird.jpg
--- a/samples/nbg_runner_pybind/examples/data/cat.jpg
+++ b/samples/nbg_runner_pybind/examples/data/cat.jpg
--- a/samples/nbg_runner_pybind/examples/data/imagenet1k_labels.txt
+++ b/samples/nbg_runner_pybind/examples/data/imagenet1k_labels.txt
--- a/samples/nbg_runner_pybind/examples/demo.py
+++ b/samples/nbg_runner_pybind/examples/demo.py
@ -0,0 +1,39 @@
+from typing import Any
+from argparse import ArgumentParser
+from pathlib import Path
+import numpy as np
+
+from nbg_runner import OVXExecutor
+
+
+def get_args() -> Any:
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--nbg", "-m",
+        type=Path,
+        default="examples/models/conv2d_relu_maxpool2d_fp32.nbg",
+        help="Path to NBG file."
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_args()
+    nbg_path: Path = args.nbg
+
+    executor = OVXExecutor(nbg_path)
+    num_inputs = executor.get_num_inputs()
+    num_outputs = executor.get_num_outputs()
+
+    input_info = executor.get_input_info(0)
+    output_info = executor.get_output_info(0)
+
+    input_tensor = np.ones(
+        shape=input_info.shape,
+        dtype=input_info.dtype
+    )
+
+    executor.set_input(0, input_tensor)
+    executor.run()
+    output_tensor = executor.get_output(0)
+    print(output_tensor)
--- a/samples/nbg_runner_pybind/examples/imagenet1k.py
+++ b/samples/nbg_runner_pybind/examples/imagenet1k.py
@ -0,0 +1,76 @@
+from typing import Any, List
+from numpy.typing import NDArray
+from argparse import ArgumentParser
+from pathlib import Path
+from nbg_runner import OVXExecutor
+import cv2 as cv
+import numpy as np
+
+
+def get_args() -> Any:
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--nbg", "-m",
+        type=Path,
+        required=True,
+        help="Path to NBG file."
+    )
+    parser.add_argument(
+        "--image", "-i",
+        type=Path,
+        required=True,
+        help="Path to image file."
+    )
+    parser.add_argument(
+        "--labels", "-l",
+        type=Path,
+        required=True,
+        help="Path to classification labels."
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = get_args()
+    nbg_path: Path = args.nbg
+    img_path: Path = args.image
+    labels_path: Path = args.labels
+
+    # Load NBG and query I/O params.
+    executor = OVXExecutor(nbg_path)
+    num_inputs = executor.get_num_inputs()
+    num_outputs = executor.get_num_outputs()
+    input_info = executor.get_input_info(index=0)
+    output_info = executor.get_output_info(index=0)
+
+    input_size = input_info.shape[1:3]
+    num_cls = output_info.shape[1]
+
+    # Load input image.
+    img_hwc: NDArray[np.uint8] = cv.imread(str(img_path), cv.IMREAD_COLOR)
+    hi, wi = input_size
+    img_hwc = cv.resize(img_hwc, dsize=(wi, hi))
+    img_hwc = cv.cvtColor(img_hwc, cv.COLOR_BGR2RGB)
+    img_nhwc = np.expand_dims(img_hwc, axis=0)
+
+    if input_info.dtype == "float32":
+        img_nhwc = img_nhwc.astype(np.float32) / np.iinfo(np.uint8).max
+
+    # Load classification labels.
+    cls_labels: List[str] = []
+    with open(labels_path, mode="r") as f:
+        for label in f:
+            cls_labels.append(label.strip())
+
+    # Run inference.
+    # executor.set_inputs([img_nhwc])
+    executor.set_input(index=0, input_tensor=img_nhwc)
+    executor.run()
+    scores = executor.get_output(index=0)
+    # scores = executor.get_outputs()[0]
+
+    cls = np.argmax(scores, axis=1)
+    cls = np.squeeze(cls, axis=0).item()
+
+    cls_label = cls_labels[cls]
+    print(f"Classification result: {cls_label}")
--- a/samples/nbg_runner_pybind/examples/models/conv2d_relu_maxpool2d_fp32.nbg
+++ b/samples/nbg_runner_pybind/examples/models/conv2d_relu_maxpool2d_fp32.nbg
--- a/samples/nbg_runner_pybind/python/nbg_runner/init.py
+++ b/samples/nbg_runner_pybind/python/nbg_runner/init.py
@ -0,0 +1,6 @@
+""" VSI NBG runner python module"""
+
+__version__ = "0.0.1"
+__author__ = "Verisilicon"
+
+from .vx.ovx_executor import OVXExecutor, OVXTensorInfo
--- a/samples/nbg_runner_pybind/python/nbg_runner/_binding/init.py
+++ b/samples/nbg_runner_pybind/python/nbg_runner/_binding/init.py
@ -0,0 +1,4 @@
+from ._nbg_runner import (
+    OVXExecutor,
+    OVXTensorInfo
+)
--- a/samples/nbg_runner_pybind/python/nbg_runner/vx/init.py
+++ b/samples/nbg_runner_pybind/python/nbg_runner/vx/init.py
--- a/samples/nbg_runner_pybind/python/nbg_runner/vx/ovx_executor.py
+++ b/samples/nbg_runner_pybind/python/nbg_runner/vx/ovx_executor.py
@ -0,0 +1,70 @@
+from typing import List, Tuple, Sequence
+from numpy.typing import NDArray
+import numpy as np
+from pathlib import Path
+from nbg_runner import _binding
+
+
+class OVXTensorInfo:
+    rank: int = ...
+    shape: Tuple[int, ...] = ...
+    dtype: str = ...
+    qtype: str = ...
+    scale: float = ...
+    zero_point: int = ...
+    fixed_point_pos: int = ...
+
+
+class OVXExecutor:
+    def __init__(self, nbg_path: Path) -> None:
+        self._exec = _binding.OVXExecutor(nbg_path)
+        self._exec.init()
+
+    def get_num_inputs(self) -> int:
+        return self._exec.get_num_inputs()
+
+    def get_num_outputs(self) -> int:
+        return self._exec.get_num_outputs()
+
+    def get_input_info(self, index: int) -> OVXTensorInfo:
+        return self._exec.get_input_info(index)
+
+    def get_output_info(self, index: int) -> OVXTensorInfo:
+        return self._exec.get_output_info(index)
+
+    def get_input_infos(self) -> List[OVXTensorInfo]:
+        input_infos: List[OVXTensorInfo] = []
+        num_inputs = self.get_num_inputs()
+        for i in range(num_inputs):
+            input_infos.append(self.get_input_info(i))
+        return input_infos
+
+    def get_output_infos(self) -> List[OVXTensorInfo]:
+        output_infos: List[OVXTensorInfo] = []
+        num_outputs = self.get_num_outputs()
+        for i in range(num_outputs):
+            output_infos.append(self.get_output_info(i))
+        return output_infos
+
+    def set_input(self, index: int, input_tensor: NDArray) -> None:
+        return self._exec.set_input(index, input_tensor)
+
+    def get_output(self, index: int) -> NDArray:
+        output_tensor: NDArray = self._exec.get_output(index)
+        return output_tensor
+
+    def set_inputs(self, input_tensors: Sequence[NDArray]) -> None:
+        for i, tensor in enumerate(input_tensors):
+            self.set_input(i, tensor)
+
+    def get_outputs(self) -> List[NDArray]:
+        output_tensors: List[NDArray] = []
+        num_outputs = self.get_num_outputs()
+        for i in range(num_outputs):
+            output_tensor = self.get_output(i)
+            output_tensors.append(output_tensor)
+
+        return output_tensors
+
+    def run(self) -> None:
+        self._exec.run()
--- a/samples/nbg_runner_pybind/src/CMakeLists.txt
+++ b/samples/nbg_runner_pybind/src/CMakeLists.txt
@ -0,0 +1,9 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+pybind11_add_module(_nbg_runner ${CMAKE_CURRENT_SOURCE_DIR}/nbg_runner_pybind.cpp)
+
+add_subdirectory(vx)
+target_link_libraries(_nbg_runner
+  PRIVATE
+  nbg_runner::vx
+)
--- a/samples/nbg_runner_pybind/src/nbg_runner_pybind.cpp
+++ b/samples/nbg_runner_pybind/src/nbg_runner_pybind.cpp
@ -0,0 +1,129 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020-2024 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl/filesystem.h>
+
+#include <array>
+#include <filesystem>
+#include <string_view>
+
+#include "vx/ovx_executor.hpp"
+#include "vx/utils.hpp"
+
+namespace vsi::nbg_runner::python {
+namespace py = pybind11;
+namespace fs = std::filesystem;
+
+PYBIND11_MODULE(_nbg_runner, m) {
+  using namespace vsi::nbg_runner::vx;
+
+  // clang-format off
+  py::class_<OVXExecutor>(m, "OVXExecutor")
+    .def(py::init<const fs::path&>())
+    .def(py::init([](const py::buffer& nbg_buffer) {
+      auto buffer_info = nbg_buffer.request(false);
+      return std::make_unique<OVXExecutor>(reinterpret_cast<char*>(buffer_info.ptr), buffer_info.size);
+    }))
+    .def("init", &OVXExecutor::init)
+    .def("get_num_inputs", &OVXExecutor::get_num_inputs)
+    .def("get_num_outputs", &OVXExecutor::get_num_outputs)
+    .def("get_input_info", &OVXExecutor::get_input_info)
+    .def("get_output_info", &OVXExecutor::get_output_info)
+    .def("set_input", [](OVXExecutor* executor, size_t index, const py::buffer& buffer) {
+      auto buffer_info = buffer.request(false);
+      std::array<size_t, OVXTensorInfo::kMaxRank> vx_shape = {0};
+      std::array<size_t, OVXTensorInfo::kMaxRank> vx_strides = {0};
+      std::reverse_copy(buffer_info.shape.cbegin(), buffer_info.shape.cend(), vx_shape.begin());
+      std::reverse_copy(buffer_info.strides.cbegin(), buffer_info.strides.cend(), vx_strides.begin());
+      executor->copy_to_input(
+        index,
+        buffer_info.ptr,
+        buffer_info.ndim,
+        vx_shape.data(),
+        vx_strides.data()
+      );
+    })
+    .def("get_output", [](OVXExecutor* executor, size_t index) -> py::array {
+      auto tensor_info = executor->get_output_info(index);
+      auto np_dtype = py::dtype(get_vx_dtype_str(tensor_info.data_type).data());
+      auto np_shape = std::vector<ssize_t>(tensor_info.rank);
+      std::reverse_copy(tensor_info.shape.data(), tensor_info.shape.data() + tensor_info.rank, np_shape.begin());
+
+      auto np_tensor = py::array(np_dtype, np_shape);
+      auto buffer_info = np_tensor.request(true);
+
+      std::array<size_t, OVXTensorInfo::kMaxRank> vx_strides = {0};
+      std::reverse_copy(buffer_info.strides.cbegin(), buffer_info.strides.cend(), vx_strides.begin());
+
+      executor->copy_from_output(
+        index,
+        buffer_info.ptr,
+        tensor_info.rank,
+        tensor_info.shape.data(),
+        vx_strides.data()
+      );
+      return np_tensor;
+    })
+    .def("run", &OVXExecutor::run)
+  ;
+
+  py::class_<OVXTensorInfo>(m, "OVXTensorInfo")
+    .def_readonly("rank", &OVXTensorInfo::rank)
+    .def_property_readonly("shape", [](OVXTensorInfo* tensor_info) -> py::tuple {
+      size_t rank = tensor_info->rank;
+      auto shape_tuple = py::tuple(rank); 
+      for (size_t i = 0; i < rank; i++){
+        shape_tuple[i] = tensor_info->shape[rank - i - 1];
+      }
+      return shape_tuple;
+    })
+    .def_property_readonly("dtype", [](OVXTensorInfo* tensor_info) -> std::string_view {
+      return get_vx_dtype_str(tensor_info->data_type);
+    })
+    .def_property_readonly("qtype", [](OVXTensorInfo* tensor_info) -> std::string_view {
+      return get_vx_qtype_str(tensor_info->quant_type);
+    })
+    .def_property_readonly("scale", [](OVXTensorInfo* tensor_info) -> float {
+      return tensor_info->quant_type == VX_QUANT_AFFINE_SCALE
+        ? tensor_info->quant_param.affine.scale
+        : 0.0F;
+    })
+    .def_property_readonly("zero_point", [](OVXTensorInfo* tensor_info) -> int32_t {
+      return tensor_info->quant_type == VX_QUANT_AFFINE_SCALE
+        ? tensor_info->quant_param.affine.zeroPoint
+        : 0;
+    })
+     .def_property_readonly("fixed_point_pos", [](OVXTensorInfo* tensor_info) -> int8_t {
+      return tensor_info->quant_type == VX_QUANT_DYNAMIC_FIXED_POINT
+        ? tensor_info->quant_param.dfp.fixed_point_pos
+        : static_cast<int8_t>(0);
+    })
+  ;
+  // clang-format on
+}
+
+}  // namespace vsi::nbg_runner::python
--- a/samples/nbg_runner_pybind/src/vx/CMakeLists.txt
+++ b/samples/nbg_runner_pybind/src/vx/CMakeLists.txt
@ -0,0 +1,8 @@
+add_library(nbg_runner_vx INTERFACE)
+add_library(nbg_runner::vx ALIAS nbg_runner_vx)
+
+target_sources(nbg_runner_vx INTERFACE
+  ${CMAKE_CURRENT_SOURCE_DIR}/ovx_executor.cpp
+)
+
+target_link_libraries(nbg_runner_vx INTERFACE viv_sdk)
--- a/samples/nbg_runner_pybind/src/vx/ovx_executor.cpp
+++ b/samples/nbg_runner_pybind/src/vx/ovx_executor.cpp
@ -0,0 +1,291 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020-2024 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#include "ovx_executor.hpp"
+
+#include <VX/vx_khr_import_kernel.h>
+
+#include <algorithm>
+#include <array>
+#include <filesystem>
+#include <fstream>
+
+#include "utils.hpp"
+
+namespace vsi::nbg_runner::vx {
+
+OVXExecutor::OVXExecutor(const char* nbg_data, size_t nbg_size) {
+  nbg_buffer_ = std::vector<char>(nbg_data, nbg_data + nbg_size);
+}
+
+OVXExecutor::OVXExecutor(const fs::path& nbg_path) {
+  size_t nbg_size = fs::file_size(nbg_path);
+  auto nbg_file = std::ifstream(nbg_path, std::ios::in | std::ios::binary);
+
+  nbg_buffer_.resize(nbg_size);
+  nbg_file.read(nbg_buffer_.data(), static_cast<std::streamsize>(nbg_size));
+}
+
+OVXExecutor::~OVXExecutor() {
+  for (auto& tensor : input_tensors_) {
+    vxReleaseTensor(&tensor);
+  }
+  for (auto& tensor : output_tensors_) {
+    vxReleaseTensor(&tensor);
+  }
+
+  vxReleaseNode(&nbg_node_);
+  vxReleaseKernel(&nbg_kernel_);
+  vxReleaseGraph(&graph_);
+  vxReleaseContext(&context_);
+}
+
+int OVXExecutor::init() {
+  vx_status status = VX_SUCCESS;
+
+  context_ = vxCreateContext();
+  if (context_ == nullptr) {
+    throw std::runtime_error("Failed to create OpenVX context.");
+  }
+
+  graph_ = vxCreateGraph(context_);
+  status = vxGetStatus(reinterpret_cast<vx_reference>(graph_));
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to create OpenVX graph.");
+  }
+
+  nbg_kernel_ = vxImportKernelFromURL(
+      context_, VX_VIVANTE_IMPORT_KERNEL_FROM_POINTER, nbg_buffer_.data());
+  status = vxGetStatus(reinterpret_cast<vx_reference>(nbg_kernel_));
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to import NBG kernel.");
+  }
+
+  status = query_nbg_io_infos();
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to query NBG I/O params.");
+  }
+  size_t num_inputs = input_tensors_infos_.size();
+  size_t num_outputs = output_tensors_infos_.size();
+
+  nbg_node_ = vxCreateGenericNode(graph_, nbg_kernel_);
+  status = vxGetStatus(reinterpret_cast<vx_reference>(nbg_node_));
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to create NBG node.");
+  }
+
+  // Create input tensors and bind to NBG node.
+  for (size_t i = 0; i < num_inputs; i++) {
+    const auto& tensor_info = input_tensors_infos_[i];
+    std::array<uint32_t, OVXTensorInfo::kMaxRank> shape;
+    std::transform(tensor_info.shape.cbegin(), tensor_info.shape.cend(),
+                   shape.begin(),
+                   [](size_t s) { return static_cast<uint32_t>(s); });
+
+    vx_tensor_create_params_t tensor_create_params = {
+        .num_of_dims = static_cast<uint32_t>(tensor_info.rank),
+        .sizes = shape.data(),
+        .data_format = tensor_info.data_type,
+        .quant_format = tensor_info.quant_type,
+        .quant_data = tensor_info.quant_param,
+    };
+    vx_tensor input_tensor = vxCreateTensor2(context_, &tensor_create_params,
+                                             sizeof(tensor_create_params));
+    if (input_tensor == nullptr) {
+      throw std::runtime_error("Failed to create input vx tensor.");
+    }
+
+    vxSetParameterByIndex(nbg_node_, i,
+                          reinterpret_cast<vx_reference>(input_tensor));
+    input_tensors_.push_back(input_tensor);
+  }
+
+  // Create output tensors and bind to NBG node.
+  for (size_t i = 0; i < num_outputs; i++) {
+    const auto& tensor_info = output_tensors_infos_[i];
+    std::array<uint32_t, OVXTensorInfo::kMaxRank> shape;
+    std::transform(tensor_info.shape.cbegin(), tensor_info.shape.cend(),
+                   shape.begin(),
+                   [](size_t s) { return static_cast<uint32_t>(s); });
+
+    vx_tensor_create_params_t tensor_create_params = {
+        .num_of_dims = static_cast<uint32_t>(tensor_info.rank),
+        .sizes = shape.data(),
+        .data_format = tensor_info.data_type,
+        .quant_format = tensor_info.quant_type,
+        .quant_data = tensor_info.quant_param,
+    };
+    vx_tensor output_tensor = vxCreateTensor2(context_, &tensor_create_params,
+                                              sizeof(tensor_create_params));
+    if (output_tensor == nullptr) {
+      throw std::runtime_error("Failed to create output vx tensor.");
+    }
+
+    vxSetParameterByIndex(nbg_node_, num_inputs + i,
+                          reinterpret_cast<vx_reference>(output_tensor));
+    output_tensors_.push_back(output_tensor);
+  }
+
+  status = vxVerifyGraph(graph_);
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to verify OpenVX graph.");
+  }
+
+  return static_cast<int>(status);
+}
+
+int OVXExecutor::query_nbg_io_infos() {
+  uint32_t num_params;
+  vxQueryKernel(nbg_kernel_, VX_KERNEL_PARAMETERS, &num_params,
+                sizeof(num_params));
+
+  for (uint32_t i = 0; i < num_params; i++) {
+    vx_parameter param = vxGetKernelParameterByIndex(nbg_kernel_, i);
+
+    vx_enum direction;
+    vxQueryParameter(param, VX_PARAMETER_DIRECTION, &direction,
+                     sizeof(direction));
+
+    vx_meta_format meta;
+    vxQueryParameter(param, VX_PARAMETER_META_FORMAT, &meta, sizeof(meta));
+
+    OVXTensorInfo tensor_info;
+    vxQueryMetaFormatAttribute(meta, VX_TENSOR_NUMBER_OF_DIMS,
+                               &tensor_info.rank, sizeof(tensor_info.rank));
+    vxQueryMetaFormatAttribute(meta, VX_TENSOR_DIMS, tensor_info.shape.data(),
+                               sizeof(tensor_info.shape));
+    vxQueryMetaFormatAttribute(meta, VX_TENSOR_DATA_TYPE,
+                               &tensor_info.data_type,
+                               sizeof(tensor_info.data_type));
+    vxQueryMetaFormatAttribute(meta, VX_TENSOR_QUANT_FORMAT,
+                               &tensor_info.quant_type,
+                               sizeof(tensor_info.quant_type));
+
+    switch (tensor_info.quant_type) {
+      case VX_QUANT_NONE:
+        break;
+      case VX_QUANT_AFFINE_SCALE:
+        vxQueryMetaFormatAttribute(
+            meta, VX_TENSOR_ZERO_POINT,
+            &tensor_info.quant_param.affine.zeroPoint,
+            sizeof(tensor_info.quant_param.affine.zeroPoint));
+        vxQueryMetaFormatAttribute(
+            meta, VX_TENSOR_SCALE, &tensor_info.quant_param.affine.scale,
+            sizeof(tensor_info.quant_param.affine.scale));
+        break;
+      case VX_QUANT_DYNAMIC_FIXED_POINT:
+        vxQueryMetaFormatAttribute(
+            meta, VX_TENSOR_SCALE, &tensor_info.quant_param.dfp.fixed_point_pos,
+            sizeof(tensor_info.quant_param.dfp.fixed_point_pos));
+        break;
+      default:
+        vxReleaseParameter(&param);
+        return VX_ERROR_NOT_SUPPORTED;
+    }
+
+    if (direction == VX_INPUT) {
+      input_tensors_infos_.push_back(tensor_info);
+    } else if (direction == VX_OUTPUT) {
+      output_tensors_infos_.push_back(tensor_info);
+    }
+
+    vxReleaseParameter(&param);
+  }
+
+  return VX_SUCCESS;
+}
+
+int OVXExecutor::copy_to_input(size_t index, void* data, size_t rank,
+                               const size_t* shape, const size_t* strides) {
+  if (index >= input_tensors_infos_.size()) {
+    throw std::out_of_range("Invalid input index.");
+    return VX_FAILURE;
+  }
+
+  vx_tensor input_tensor = input_tensors_[index];
+  auto tensor_info = input_tensors_infos_[index];
+
+  if (rank != tensor_info.rank) {
+    throw std::invalid_argument("Tensor rank mismatch.");
+    return VX_FAILURE;
+  }
+
+  if (strides[0] != get_vx_dtype_bytes(tensor_info.data_type)) {
+    throw std::invalid_argument("Tensor element size mismatch.");
+    return VX_FAILURE;
+  }
+
+  for (size_t i = 0; i < rank; i++) {
+    if (shape[i] != tensor_info.shape[i]) {
+      throw std::invalid_argument("Tensor shape mismatch.");
+      return VX_FAILURE;
+    }
+  }
+
+  std::array<size_t, OVXTensorInfo::kMaxRank> view_start = {0};
+
+  vx_status status =
+      vxCopyTensorPatch(input_tensor, rank, view_start.data(), shape, strides,
+                        data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to copy input data.");
+    return status;
+  }
+
+  return VX_SUCCESS;
+}
+
+int OVXExecutor::copy_from_output(size_t index, void* data, size_t rank,
+                                  const size_t* shape, const size_t* strides) {
+  if (index >= output_tensors_infos_.size()) {
+    throw std::runtime_error("Invalid output index.");
+    return VX_FAILURE;
+  }
+
+  vx_tensor output_tensor = output_tensors_[index];
+
+  std::array<size_t, OVXTensorInfo::kMaxRank> view_start = {0};
+  vx_status status =
+      vxCopyTensorPatch(output_tensor, rank, view_start.data(), shape, strides,
+                        data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to copy output data.");
+    return status;
+  }
+
+  return VX_SUCCESS;
+}
+
+int OVXExecutor::run() {
+  vx_status status;
+
+  status = vxProcessGraph(graph_);
+  if (status != VX_SUCCESS) {
+    throw std::runtime_error("Failed to run OpenVX graph.");
+  }
+
+  return static_cast<int>(status);
+}
+
+}  // namespace vsi::nbg_runner::vx
--- a/samples/nbg_runner_pybind/src/vx/ovx_executor.hpp
+++ b/samples/nbg_runner_pybind/src/vx/ovx_executor.hpp
@ -0,0 +1,110 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020-2024 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef VSI_NBG_RUNNER_VX_OVX_EXECUTOR_HPP_
+#define VSI_NBG_RUNNER_VX_OVX_EXECUTOR_HPP_
+
+#include <VX/vx.h>
+#include <VX/vx_api.h>
+#include <VX/vx_khr_nn.h>
+#include <VX/vx_types.h>
+
+#include <array>
+#include <filesystem>
+#include <vector>
+
+
+namespace vsi::nbg_runner::vx {
+
+namespace fs = std::filesystem;
+
+struct OVXTensorInfo {
+  static constexpr size_t kMaxRank = 6;
+
+  size_t rank;
+  std::array<size_t, kMaxRank> shape;
+  vx_enum data_type;
+  vx_enum quant_type;
+  vx_tensor_quant_param quant_param;
+};
+
+class OVXExecutor {
+ public:
+  explicit OVXExecutor(const char* nbg_data, size_t nbg_size);
+  explicit OVXExecutor(const fs::path& nbg_path);
+
+  ~OVXExecutor();
+
+  /** \brief Class initialization. */
+  int init();
+
+  /** \brief I/O params query getters. */
+  [[nodiscard]] size_t get_num_inputs() const {
+    return input_tensors_infos_.size();
+  }
+  [[nodiscard]] size_t get_num_outputs() const {
+    return output_tensors_infos_.size();
+  }
+  [[nodiscard]] OVXTensorInfo get_input_info(size_t index) const {
+    return input_tensors_infos_[index];
+  }
+  [[nodiscard]] OVXTensorInfo get_output_info(size_t index) const {
+    return output_tensors_infos_[index];
+  }
+
+  /** \brief Copy I/O tensor data. */
+  int copy_to_input(size_t index, void* data, size_t rank, const size_t* shape,
+                const size_t* strides);
+  int copy_from_output(size_t index, void* data, size_t rank, const size_t* shape,
+                const size_t* strides);
+
+  int run();
+
+ private:
+  int query_nbg_io_infos();
+
+  /** \brief The OpenVX context for management of all OpenVX objects. */
+  vx_context context_;
+  /** \brief The OpenVX graph for execution. */
+  vx_graph graph_;
+  /** \brief The OpenVX NBG node. */
+  vx_node nbg_node_;
+  /** \brief The OpenVX NBG kernel. */
+  vx_kernel nbg_kernel_;
+  /** \brief The OpenVX input tensors params. */
+  std::vector<OVXTensorInfo> input_tensors_infos_;
+  /** \brief The OpenVX output tensors params. */
+  std::vector<OVXTensorInfo> output_tensors_infos_;
+  /** \brief The OpenVX input tensors. */
+  std::vector<vx_tensor> input_tensors_;
+  /** \brief The OpenVX output tensors. */
+  std::vector<vx_tensor> output_tensors_;
+
+  /** \brief The NBG buffer. */
+  std::vector<char> nbg_buffer_;
+};
+
+}  // namespace vsi::nbg_runner::vx
+
+#endif
--- a/samples/nbg_runner_pybind/src/vx/utils.hpp
+++ b/samples/nbg_runner_pybind/src/vx/utils.hpp
@ -0,0 +1,107 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020-2024 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef VSI_NBG_RUNNER_VX_UTILS_HPP_
+#define VSI_NBG_RUNNER_VX_UTILS_HPP_
+
+#include <VX/vx_types.h>
+
+#include <string_view>
+
+namespace vsi::nbg_runner::vx {
+
+inline size_t get_vx_dtype_bytes(vx_enum data_type) {
+  switch (data_type) {
+    case VX_TYPE_INT8:
+    case VX_TYPE_UINT8:
+    case VX_TYPE_BOOL8:
+    case VX_TYPE_CHAR:
+      return 1;
+    case VX_TYPE_INT16:
+    case VX_TYPE_UINT16:
+    case VX_TYPE_FLOAT16:
+    case VX_TYPE_BFLOAT16:
+      return 2;
+    case VX_TYPE_INT32:
+    case VX_TYPE_UINT32:
+    case VX_TYPE_FLOAT32:
+      return 4;
+    case VX_TYPE_INT64:
+    case VX_TYPE_UINT64:
+    case VX_TYPE_FLOAT64:
+      return 8;
+    default:
+      return 0;
+  }
+}
+
+inline std::string_view get_vx_dtype_str(vx_enum data_type) {
+  switch (data_type) {
+    case VX_TYPE_INT8:
+      return "int8";
+    case VX_TYPE_UINT8:
+      return "uint8";
+    case VX_TYPE_BOOL8:
+      return "bool";
+    case VX_TYPE_INT16:
+      return "int16";
+    case VX_TYPE_UINT16:
+      return "uint16";
+    case VX_TYPE_FLOAT16:
+      return "float16";
+    case VX_TYPE_BFLOAT16:
+      return "bfloat16";
+    case VX_TYPE_INT32:
+      return "int32";
+    case VX_TYPE_UINT32:
+      return "uint32";
+    case VX_TYPE_FLOAT32:
+      return "float32";
+    case VX_TYPE_INT64:
+      return "int64";
+    case VX_TYPE_UINT64:
+      return "uint64";
+    case VX_TYPE_FLOAT64:
+      return "float32";
+    default:
+      return "unknown";
+  }
+}
+
+inline std::string_view get_vx_qtype_str(vx_enum quant_type) {
+  switch (quant_type) {
+    case VX_QUANT_DYNAMIC_FIXED_POINT:
+      return "dfp";
+    case VX_QUANT_AFFINE_SCALE:
+      return "affine";
+    case VX_QUANT_AFFINE_SCALE_PER_CHANNEL:
+      return "perchannel_affine";
+    case VX_QUANT_NONE:
+    default:
+      return "none";
+  }
+}
+
+}  // namespace vsi::nbg_runner::vx
+#endif