support virtual vip devices (#331)

2022-04-06 13:05:38 +08:00 · 2022-04-06 13:05:38 +08:00 · 70d2f410a8
parent 1ca89d2ffa
commit 70d2f410a8
12 changed files with 37197 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,6 +11,7 @@ option(TIM_VX_USE_EXTERNAL_OVXLIB       "Use external OVXLIB"
 option(TIM_VX_BUILD_EXAMPLES            "Build demos show general usage"        OFF)
 option(TIM_VX_ENABLE_VIPLITE            "Enable lite driver api support"        OFF)
 option(TIM_VX_ENABLE_40BIT              "Enable large memory support"           OFF)
+option(TIM_VX_ENABLE_PLATFORM           "Enable multi devices support"          OFF)

 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
--- a/include/tim/vx/platform/native.h
+++ b/include/tim/vx/platform/native.h
@ -0,0 +1,84 @@
+/****************************************************************************
+*
+*    Copyright (c) 2022 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#ifndef TIM_VX_NATIVE_H_
+#define TIM_VX_NATIVE_H_
+
+#include "tim/vx/platform/platform.h"
+
+namespace tim {
+namespace vx {
+namespace platform {
+
+class NativeDevice : public IDevice {
+ public:
+  ~NativeDevice(){};
+  virtual bool Submit(const std::shared_ptr<Graph>& graph) = 0;
+  virtual bool Trigger(bool async = false, async_callback cb = NULL) = 0;
+  virtual bool DeviceExit() = 0;
+  virtual void WaitDeviceIdle() = 0;
+  static std::vector<std::shared_ptr<IDevice>> Enumerate();
+
+};
+
+class NativeExecutable : public IExecutable{
+ public:
+  NativeExecutable(const std::shared_ptr<IExecutor>& executor, const std::vector<char>& nb_buf, size_t inputs, size_t outputs);
+  ~NativeExecutable(){};
+  void SetInput(const std::shared_ptr<ITensorHandle>& th) override;
+  void SetOutput(const std::shared_ptr<ITensorHandle>& th) override;
+  void GetOutput(const std::vector<std::shared_ptr<ITensorHandle>>& th) override;
+  bool Submit(const std::shared_ptr<IExecutable>& ref, bool after = true) override;
+  bool Trigger(bool async = false) override;
+  std::shared_ptr<ITensorHandle> AllocateTensor(const TensorSpec& tensor_spec);
+  bool Verify() override;
+
+ protected:
+  std::shared_ptr<tim::vx::ops::NBG> nb_node_;
+  std::vector<char> nb_buf_;
+
+};
+
+class NativeExecutor : public IExecutor, public std::enable_shared_from_this<NativeExecutor>{
+ public:
+  NativeExecutor(const std::shared_ptr<IDevice>& device);
+  NativeExecutor(const std::shared_ptr<IDevice>& device, const std::shared_ptr<Context>& context);
+  ~NativeExecutor(){};
+  bool Submit(const std::shared_ptr<IExecutable>& executable, const std::shared_ptr<IExecutable>& ref, bool after = true) override;
+  bool Trigger(bool async = false) override;
+  std::shared_ptr<IExecutable> Compile(const std::shared_ptr<Graph>& graph) override;
+
+};
+
+class NativeTensorHandle : public ITensorHandle {
+ public:
+  NativeTensorHandle(const std::shared_ptr<Tensor>& tensor);
+  bool CopyDataToTensor(const void* data, uint32_t size_in_bytes) override;
+  bool CopyTensorToData(void* data) override;
+
+};
+
+}  // namespace platform
+}  // namespace vx
+}  // namespace tim
+#endif
--- a/include/tim/vx/platform/platform.h
+++ b/include/tim/vx/platform/platform.h
@ -0,0 +1,140 @@
+/****************************************************************************
+*
+*    Copyright (c) 2022 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#ifndef TIM_VX_PLATFORM_H_
+#define TIM_VX_PLATFORM_H_
+
+#include <memory>
+#include <vector>
+#include <functional>
+#include <iostream>
+#include "tim/vx/graph.h"
+#include "tim/vx/tensor.h"
+#include "tim/vx/context.h"
+#include "tim/vx/ops/nbg.h"
+
+namespace tim {
+namespace vx {
+
+class Graph;
+class Context;
+
+namespace ops{
+class NBG;
+}
+
+namespace platform {
+
+class IDevice;
+class IExecutable;
+class ExecutableSet;
+class IExecutor;
+class ITensorHandle;
+
+std::shared_ptr<IExecutable> Compile(const std::shared_ptr<Graph>& graph, const std::shared_ptr<IExecutor>& executor);
+std::shared_ptr<IExecutable> CreateExecutableSet(const std::vector<std::shared_ptr<IExecutable>>& executables);
+
+class IDevice {
+ public:
+  using device_id_t = uint32_t;
+  using async_callback = std::function<bool (const void*)>;
+  using data_t = const void*;
+  virtual ~IDevice(){};
+  virtual bool Submit(const std::shared_ptr<Graph>& graph) = 0;
+  virtual bool Trigger(bool async = false, async_callback cb = NULL) = 0;
+  device_id_t Id() const;
+  virtual void WaitDeviceIdle() = 0;
+  virtual bool DeviceExit() = 0;
+
+ protected:
+  device_id_t device_id_;
+};
+
+class IExecutor {
+ public:
+  using task = std::weak_ptr<IExecutable>;
+  virtual ~IExecutor(){};
+  virtual bool Submit(const std::shared_ptr<IExecutable>& executable, const std::shared_ptr<IExecutable>& ref, bool after=true) = 0;
+  virtual bool Trigger(bool async = false) = 0;  // todo: async=true
+  virtual std::shared_ptr<IExecutable> Compile(const std::shared_ptr<Graph>& graph) = 0;
+  virtual std::shared_ptr<IDevice> Device() const;
+  virtual std::shared_ptr<Context> Contex() const;
+
+ protected:
+  std::vector<task> tasks_;
+  std::shared_ptr<IDevice> device_;
+  std::shared_ptr<Context> context_;
+};
+
+class IExecutable : public std::enable_shared_from_this<IExecutable>{
+ public:
+  virtual ~IExecutable(){};
+  virtual void SetInput(const std::shared_ptr<ITensorHandle>& th) = 0;
+  virtual void SetOutput(const std::shared_ptr<ITensorHandle>& th) = 0;
+  virtual void GetOutput(const std::vector<std::shared_ptr<ITensorHandle>>& th) = 0;  // for remote
+  virtual bool Submit(const std::shared_ptr<IExecutable>& ref, bool after = true) = 0;
+  virtual bool Trigger(bool async = false) = 0;  // todo: async=true
+  virtual bool Verify() = 0;
+  virtual std::shared_ptr<Graph> NBGraph() const;
+  virtual std::shared_ptr<ITensorHandle> AllocateTensor(const TensorSpec& tensor_spec) = 0;
+  virtual std::shared_ptr<IExecutor> Executor() const;
+
+ protected:
+  std::weak_ptr<IExecutor> executor_;
+  std::shared_ptr<Context> context_;
+  std::shared_ptr<Graph> nb_graph_;
+};
+
+class ExecutableSet : public IExecutable{
+ public:
+  ExecutableSet(const std::vector<std::shared_ptr<IExecutable>>& executables);
+  void SetInput(const std::shared_ptr<ITensorHandle>& th) override;
+  void SetOutput(const std::shared_ptr<ITensorHandle>& th) override;
+  void GetOutput(const std::vector<std::shared_ptr<ITensorHandle>>& th) override;
+  bool Submit(const std::shared_ptr<IExecutable>& ref, bool after = true) override;
+  bool Trigger(bool async = false) override;
+  bool Verify() override;
+  std::shared_ptr<ITensorHandle> AllocateTensor(const TensorSpec& tensor_spec) override;
+  std::vector<std::shared_ptr<IExecutable>> Executables() const;
+  
+ protected:
+  std::vector<std::shared_ptr<IExecutable>> executables_;
+
+};
+
+class ITensorHandle {
+ public:
+  virtual ~ITensorHandle(){};
+  virtual bool CopyDataToTensor(const void* data, uint32_t size_in_bytes) = 0;
+  virtual bool CopyTensorToData(void* data) = 0;
+  virtual std::shared_ptr<Tensor> GetTensor() const;
+
+ protected:
+  std::shared_ptr<Tensor> tensor_;
+
+};
+
+}  // namespace platform
+}  // namespace vx
+}  // namespace tim
+#endif
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@ -15,3 +15,7 @@ endif()
 if(TIM_VX_ENABLE_NBG_PARSER)
    add_subdirectory("nbg_runner")
 endif()
+
+if(TIM_VX_ENABLE_PLATFORM)
+    add_subdirectory("lenet_multi_device")
+endif()
--- a/samples/lenet_multi_device/BUILD
+++ b/samples/lenet_multi_device/BUILD
@ -0,0 +1,13 @@
+cc_test(
+    name = "lenet_asymu8_cc",
+    copts = [
+        "-Werror", "-std=c++14"
+    ],
+    srcs = [
+        "lenet_asymu8.cc",
+        "lenet_asymu8_weights.h"
+    ],
+    deps = [
+        "//:tim-vx_interface"
+    ],
+)
--- a/samples/lenet_multi_device/CMakeLists.txt
+++ b/samples/lenet_multi_device/CMakeLists.txt
@ -0,0 +1,12 @@
+message("samples/lenet_multi_device")
+
+set(TARGET_NAME "lenet_multi_device")
+
+aux_source_directory(. ${TARGET_NAME}_SRCS)
+add_executable(${TARGET_NAME} ${${TARGET_NAME}_SRCS})
+
+target_link_libraries(${TARGET_NAME} PRIVATE -Wl,--whole-archive tim-vx)
+target_include_directories(${TARGET_NAME} PRIVATE 
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${PROJECT_SOURCE_DIR}/include
+)
--- a/samples/lenet_multi_device/lenet.h
+++ b/samples/lenet_multi_device/lenet.h
--- a/samples/lenet_multi_device/lenet_multi_device.cc
+++ b/samples/lenet_multi_device/lenet_multi_device.cc
@ -0,0 +1,174 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <tuple>
+#include <vector>
+#include <assert.h>
+#include <chrono>
+
+#include "lenet.h"
+#include "tim/vx/context.h"
+#include "tim/vx/graph.h"
+#include "tim/vx/platform/platform.h"
+#include "tim/vx/platform/native.h"
+
+std::vector<uint8_t> input_data = {
+    0,   0,   0,   0,   0,   0,   0,   0,   6,   0,   2,   0,   0,   8,   0,
+    3,   0,   7,   0,   2,   0,   0,   0,   10,  0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   3,   1,   1,   0,   14,  0,   0,   3,   0,
+    2,   4,   0,   0,   0,   3,   1,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   4,   3,   0,   0,   0,   5,   0,   4,   0,   0,
+    0,   0,   10,  12,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   6,   5,   0,   2,   0,   9,   0,   12,  2,   0,   5,   1,   0,
+    0,   2,   9,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    3,   0,   33,  0,   0,   155, 186, 55,  17,  22,  0,   0,   3,   9,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    2,   0,   167, 253, 255, 235, 255, 240, 134, 36,  0,   6,   1,   4,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   5,   6,   87,
+    240, 251, 254, 254, 237, 255, 252, 191, 27,  0,   0,   5,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,   19,  226, 255, 235,
+    255, 255, 254, 242, 255, 255, 68,  12,  0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   4,   1,   58,  254, 255, 158, 0,   2,
+    47,  173, 253, 247, 255, 65,  4,   1,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   162, 240, 248, 92,  8,   0,   13,  0,
+    88,  249, 244, 148, 0,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   17,  64,  244, 255, 210, 0,   0,   1,   2,   0,   52,  223,
+    255, 223, 0,   11,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   144, 245, 255, 142, 0,   4,   9,   0,   6,   0,   37,  222, 226,
+    42,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   73,
+    255, 243, 104, 0,   0,   0,   0,   11,  0,   0,   0,   235, 242, 101, 4,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   133, 245, 226,
+    12,  4,   15,  0,   0,   0,   0,   24,  0,   235, 246, 41,  0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   236, 245, 152, 0,   10,
+    0,   0,   0,   0,   6,   0,   28,  227, 239, 1,   6,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   227, 240, 53,  4,   0,   0,   24,
+    0,   1,   0,   8,   181, 249, 177, 0,   2,   0,   0,   0,   0,   4,   0,
+    6,   1,   5,   0,   0,   87,  246, 219, 14,  0,   0,   2,   0,   10,  7,
+    0,   134, 255, 249, 104, 4,   0,   0,   0,   0,   0,   8,   0,   3,   0,
+    0,   0,   4,   89,  255, 228, 0,   11,  0,   8,   14,  0,   0,   100, 250,
+    248, 236, 0,   0,   8,   0,   0,   0,   0,   5,   0,   2,   0,   0,   2,
+    6,   68,  250, 228, 6,   6,   0,   0,   1,   0,   140, 240, 253, 238, 51,
+    31,  0,   3,   0,   0,   0,   0,   0,   0,   5,   0,   0,   2,   0,   26,
+    215, 255, 119, 0,   21,  1,   40,  156, 233, 244, 239, 103, 0,   6,   6,
+    0,   0,   0,   0,   0,   0,   0,   5,   0,   0,   0,   0,   0,   225, 251,
+    240, 141, 118, 139, 222, 244, 255, 249, 112, 17,  0,   0,   8,   3,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   84,  245, 255, 247,
+    255, 249, 255, 255, 249, 132, 11,  0,   9,   3,   1,   1,   0,   0,   0,
+    0,   2,   0,   0,   1,   0,   0,   6,   1,   0,   166, 236, 255, 255, 248,
+    249, 248, 72,  0,   0,   16,  0,   16,  0,   4,   0,   0,   0,   0,   0,
+    0,   0,   6,   0,   0,   4,   0,   0,   20,  106, 126, 188, 190, 112, 28,
+    0,   21,  0,   1,   2,   0,   0,   3,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+    0,   0,   0,   0,
+};
+template <typename T>
+static void printTopN(const T* prob, int outputCount, int topNum) {
+  std::vector<std::tuple<int, T>> data;
+
+  for (int i = 0; i < outputCount; i++) {
+    data.push_back(std::make_tuple(i, prob[i]));
+  }
+
+  std::sort(data.begin(), data.end(),
+            [](auto& a, auto& b) { return std::get<1>(a) > std::get<1>(b); });
+
+  std::cout << " --- Top" << topNum << " ---" << std::endl;
+  for (int i = 0; i < topNum; i++) {
+    std::cout << std::setw(3) << std::get<0>(data[i]) << ": " << std::fixed
+              << std::setprecision(6) << std::get<1>(data[i]) << std::endl;
+  }
+}
+
+int main(int argc, char** argv) {
+  (void) argc, (void) argv;
+  auto context0 = tim::vx::Context::Create();
+  auto graph0 = lenet(context0);
+  auto graph1 = lenet(context0);
+
+  auto devices = tim::vx::platform::NativeDevice::Enumerate();
+  auto device = devices[0];
+  std::shared_ptr<tim::vx::platform::IExecutor> executor = std::make_shared<tim::vx::platform::NativeExecutor> (device);
+
+  auto executable0 = tim::vx::platform::Compile(graph0, executor);  // compile to nbg
+  auto input_handle0 = executable0->AllocateTensor(graph0->InputsTensor()[0]->GetSpec());
+  auto output_handle0 = executable0->AllocateTensor(graph0->OutputsTensor()[0]->GetSpec());
+  executable0->SetInput(input_handle0);
+  executable0->SetOutput(output_handle0);
+  input_handle0->CopyDataToTensor(input_data.data(), input_data.size());
+  assert(executable0->Submit(executable0));
+  executable0->Trigger();
+
+  auto executable1 = tim::vx::platform::Compile(graph1, executor);  // compile to nbg
+  auto input_handle1 = executable1->AllocateTensor(graph1->InputsTensor()[0]->GetSpec());
+  auto output_handle1 = executable1->AllocateTensor(graph1->OutputsTensor()[0]->GetSpec());
+  executable1->SetInput(input_handle1);
+  executable1->SetOutput(output_handle1);
+  input_handle1->CopyDataToTensor(input_data.data(), input_data.size());
+  assert(executable1->Submit(executable0));
+  executable1->Trigger();
+
+  executor->Submit(executable0, executable0);
+  executor->Submit(executable1, executable0);
+
+  std::vector<std::shared_ptr<tim::vx::platform::IExecutable>> executables0;
+  executables0.push_back(executable0);
+  executables0.push_back(executable1);
+  auto executable_set0 = tim::vx::platform::CreateExecutableSet(executables0);
+  executor->Submit(executable_set0, executable_set0);
+  executor->Trigger();
+
+  std::vector<uint8_t> input_data0;
+  input_data0.resize(28 * 28);
+  if (!input_handle0->CopyTensorToData(input_data0.data())) {
+    std::cout << "Copy intput data fail." << std::endl;
+    return -1;
+  }
+  printTopN(input_data0.data(), input_data0.size(), 5);
+
+  std::vector<float> output_data;
+  output_data.resize(1 * 10);
+  if (!output_handle0->CopyTensorToData(output_data.data())) {
+    std::cout << "Copy output data fail." << std::endl;
+    return -1;
+  }
+  printTopN(output_data.data(), output_data.size(), 5);
+
+  std::vector<float> output_data1;
+  output_data1.resize(1 * 10);
+  if (!output_handle1->CopyTensorToData(output_data1.data())) {
+    std::cout << "Copy output data fail." << std::endl;
+    return -1;
+  }
+  printTopN(output_data1.data(), output_data1.size(), 5);
+
+  return 0;
+}
--- a/samples/multi_device/multi_device_demo.cc
+++ b/samples/multi_device/multi_device_demo.cc
@ -0,0 +1,99 @@
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "tim/vx/context.h"
+#include "tim/vx/graph.h"
+#include "tim/vx/operation.h"
+#include "tim/vx/tensor.h"
+#include "tim/vx/platform/native.h"
+
+static void printTopN() {
+}
+
+int main(int argc, char** argv) {
+  (void) argc, (void) argv;
+  std::vector<uint8_t> input_data = {};
+  auto context = tim::vx::Context::Create();
+  //         -->g1--g2-->
+  //    g0-->|           |-->g5
+  //         -->g3--g4-->
+  std::shared_ptr<tim::vx::Graph> g0, g1, g2, g3, g4, g5;
+
+  tim::vx::TensorSpec g0_input0, g0_output0, g1_output0, g2_output0, g3_output0, g4_output0, g5_output0;
+
+  // query device and get executor of devcie
+  auto devices = tim::vx::platform::NativeDevice::Enumerate();
+  auto device = devices[0];
+  std::shared_ptr<tim::vx::platform::IExecutor> executor = std::make_shared<tim::vx::platform::NativeExecutor> (device);
+
+  // executable0
+  auto executable0 = executor->Compile(g0);  // compile to nbg
+  auto input_handle = executable0->AllocateTensor(g0_input0);
+  executable0->SetInput(input_handle);  // set input_hanlde
+  input_handle->CopyDataToTensor(input_data.data(), input_data.size());
+  executable0->SetOutput(executable0->AllocateTensor(g0_output0)); // set output_handle
+  // executable1
+  auto executable1 = executor->Compile(g1);  // compile to nbg
+  executable1->SetInput(executable1->AllocateTensor(g0_output0));  // set input_hanlde
+  executable1->SetOutput(executable1->AllocateTensor(g1_output0)); // set output_handle
+  // executable2
+  auto executable2 = executor->Compile(g2);  // compile to nbg
+  executable2->SetInput(executable2->AllocateTensor(g1_output0));  // set input_hanlde
+  executable2->SetOutput(executable2->AllocateTensor(g2_output0)); // set output_handle
+  // executable3
+  auto executable3 = executor->Compile(g3);  // compile to nbg
+  executable3->SetInput(executable3->AllocateTensor(g2_output0));  // set input_hanlde
+  executable3->SetOutput(executable3->AllocateTensor(g3_output0)); // set output_handle
+  // executable4
+  auto executable4 = executor->Compile(g4);  // compile to nbg
+  executable4->SetInput(executable4->AllocateTensor(g3_output0));  // set input_hanlde
+  executable4->SetOutput(executable4->AllocateTensor(g4_output0)); // set output_handle
+  // executable5
+  auto executable5 = executor->Compile(g5);  // compile to nbg
+  executable5->SetInput(executable5->AllocateTensor(g3_output0));  // set input_hanlde
+  executable5->SetInput(executable5->AllocateTensor(g4_output0));  // set input_hanlde
+  executable5->SetOutput(executable5->AllocateTensor(g5_output0)); // set output_handle
+
+  /* 1. one way to run */
+  executable0->Submit(executable0);
+  executable1->Submit(executable0);  // executable1 run after executable0
+  executable2->Submit(executable1);
+  executable3->Submit(executable0);
+  executable4->Submit(executable3);
+  executable5->Submit(executable2);  // executable5 run after executable2
+  executable5->Submit(executable4);  // executable5 run after executable4
+  // trigger
+  executor->Trigger();  // run all submitted executables
+
+  /* 2. another way to run */
+  // executable_set0
+  std::vector<std::shared_ptr<tim::vx::platform::IExecutable>> executables0;
+  executables0.push_back(executable0);
+  auto executable_set0 = CreateExecutableSet(executables0);
+  // executable_set1
+  std::vector<std::shared_ptr<tim::vx::platform::IExecutable>> executables1;
+  executables1.push_back(executable1);
+  executables1.push_back(executable3);
+  auto executable_set1 = CreateExecutableSet(executables1);
+  // executable_set2
+  std::vector<std::shared_ptr<tim::vx::platform::IExecutable>> executables2;
+  executables2.push_back(executable2);
+  executables2.push_back(executable4);
+  auto executable_set2 = CreateExecutableSet(executables2);
+  // executable_set3
+  std::vector<std::shared_ptr<tim::vx::platform::IExecutable>> executables3;
+  executables3.push_back(executable5);
+  auto executable_set3 = CreateExecutableSet(executables3);
+  // submit executaleSets
+  executable_set0->Submit(executable_set0);
+  executable_set1->Submit(executable_set0);
+  executable_set2->Submit(executable_set1);
+  executable_set3->Submit(executable_set2);
+  // trigger
+  executor->Trigger();  // run all submitted executableSets
+
+  printTopN();
+
+  return 0;
+}
--- a/src/tim/CMakeLists.txt
+++ b/src/tim/CMakeLists.txt
@ -45,6 +45,14 @@ if(TIM_VX_ENABLE_LAYOUT_INFER)
    )
 endif()

+if(TIM_VX_ENABLE_PLATFORM)
+    message(STATUS "Using paltform")
+    aux_source_directory(./vx/platform PLATFORM_SRC)
+    list(APPEND ${TARGET_NAME}_SRCS
+        ${PLATFORM_SRC}
+    )
+endif()
+
 foreach(src_file ${${TARGET_NAME}_SRCS})
    if(${src_file} MATCHES ".*_test\.cc")
        list(REMOVE_ITEM ${TARGET_NAME}_SRCS ${src_file})
@ -116,4 +124,9 @@ if(TIM_VX_ENABLE_TEST)
    install(TARGETS unit_test DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR})
 endif()

+if(TIM_VX_ENABLE_PLATFORM)
+    install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx/platform
+            DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim/vx)
+endif()
+
 add_subdirectory("utils")
--- a/src/tim/vx/platform/native.cc
+++ b/src/tim/vx/platform/native.cc
@ -0,0 +1,302 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#include "tim/vx/platform/native.h"
+#include "native_device_private.h"
+
+namespace tim {
+namespace vx {
+namespace platform {
+
+std::shared_ptr<IExecutable> Compile(const std::shared_ptr<Graph>& graph, const std::shared_ptr<IExecutor>& executor) {
+  return executor->Compile(graph);
+}
+
+std::shared_ptr<IExecutable> CreateExecutableSet(const std::vector<std::shared_ptr<IExecutable>>& executables) {
+  ExecutableSet* executable_set = new ExecutableSet(executables);
+  std::shared_ptr<IExecutable> executable(executable_set);
+  return executable;
+}
+
+IDevice::device_id_t IDevice::Id() const {
+  return device_id_;
+}
+
+NativeDeviceImpl::NativeDeviceImpl(device_id_t id) {
+  vip_device_ = std::make_unique<vip::IDevice> (id);
+  device_id_ = id;
+}
+
+bool NativeDeviceImpl::Submit(const std::shared_ptr<Graph>& graph) {
+  GraphImpl* graphimp = dynamic_cast<GraphImpl*> (graph.get()); // hack to downcast
+  vsi_graph_v_.push_back(graphimp->graph());
+  return true;
+}
+
+bool NativeDeviceImpl::Trigger(bool async, async_callback cb) {
+// extract graph from tasks
+  (void)async;
+  bool status = false;
+  while (!vsi_graph_v_.empty()) {
+    auto task = vsi_graph_v_.front();
+    vsi_graph_v_.erase(vsi_graph_v_.begin());
+    status = vip_device_->GraphSubmit(task, cb, NULL);
+  }
+  return status;
+}
+
+void NativeDeviceImpl::WaitDeviceIdle() {
+  vip_device_->WaitThreadIdle();
+}
+
+bool NativeDeviceImpl::DeviceExit() {
+  return vip_device_->ThreadExit();
+}
+
+std::vector<std::shared_ptr<IDevice>> NativeDevice::Enumerate() {
+  std::vector<std::shared_ptr<IDevice>> device_v;
+  device_id_t deviceCount = 0;
+  vsi_nn_context_t context;
+  context = vsi_nn_CreateContext();
+  vxQueryContext(context->c, VX_CONTEXT_DEVICE_COUNT_VIV, &deviceCount, sizeof(deviceCount));
+  std::cout<< "Device count = "<< deviceCount <<std::endl;
+  for (device_id_t i = 0; i < deviceCount; i++) {
+    IDevice* local_device = new NativeDeviceImpl(i);
+    std::shared_ptr<IDevice> local_device_sp(local_device);
+    device_v.push_back(local_device_sp);
+  }
+  vsi_nn_ReleaseContext(&context);
+  return device_v;
+}
+
+std::shared_ptr<Graph> IExecutable::NBGraph() const {
+  return nb_graph_;
+}
+
+std::shared_ptr<IExecutor> IExecutable::Executor() const {
+  auto executor = executor_.lock();
+  if (!executor) {
+    std::cout<< "Executor unable to lock weak_ptr";
+  }
+  return executor;
+}
+
+NativeExecutable::NativeExecutable(const std::shared_ptr<IExecutor>& executor, const std::vector<char>& nb_buf, size_t inputs, size_t outputs) {
+  executor_ = executor;
+  context_ = executor->Contex();
+  nb_graph_ = context_->CreateGraph();
+  nb_buf_ = nb_buf;
+  nb_node_ = nb_graph_->CreateOperation<tim::vx::ops::NBG>(nb_buf_.data(), inputs, outputs);
+}
+
+void NativeExecutable::SetInput(const std::shared_ptr<ITensorHandle>& th) {
+  nb_node_->BindInput(th->GetTensor());
+}
+
+void NativeExecutable::SetOutput(const std::shared_ptr<ITensorHandle>& th) {
+  nb_node_->BindOutput(th->GetTensor());
+}
+
+void NativeExecutable::GetOutput(const std::vector<std::shared_ptr<ITensorHandle>>& th) {
+  (void)th;
+}
+
+bool NativeExecutable::Submit(const std::shared_ptr<IExecutable>& ref, bool after) {
+  bool status = false;
+  std::shared_ptr<IExecutable> executable = shared_from_this();
+  status = Executor()->Submit(executable, ref, after);
+  return status;
+}
+
+bool NativeExecutable::Trigger(bool async) {
+  (void)async;
+  bool status = false;
+  auto device = Executor()->Device();
+  device->Submit(nb_graph_);
+  status = device->Trigger();
+  device->WaitDeviceIdle();
+  return status;
+}
+
+std::shared_ptr<ITensorHandle> NativeExecutable::AllocateTensor(const TensorSpec& tensor_spec) {
+  auto tensor = nb_graph_->CreateTensor(tensor_spec);
+  ITensorHandle* tensor_handle = new NativeTensorHandle(tensor);
+  std::shared_ptr<ITensorHandle> tensor_handle_sp (tensor_handle);
+  return tensor_handle_sp;
+}
+
+bool NativeExecutable::Verify() {
+  return nb_graph_->Compile();
+}
+
+ExecutableSet::ExecutableSet(const std::vector<std::shared_ptr<IExecutable>>& executables) {
+  executables_ = executables;
+  executor_ = executables[0]->Executor();
+}
+
+void ExecutableSet::SetInput(const std::shared_ptr<ITensorHandle>& th) {
+  (void)th;
+}
+
+void ExecutableSet::SetOutput(const std::shared_ptr<ITensorHandle>& th) {
+  (void)th;
+}
+
+void ExecutableSet::GetOutput(const std::vector<std::shared_ptr<ITensorHandle>>& th) {
+  (void)th;
+}
+
+bool ExecutableSet::Submit(const std::shared_ptr<IExecutable>& ref, bool after) {
+  bool status = false;
+  std::shared_ptr<IExecutable> executable = shared_from_this();
+  status = Executor()->Submit(executable, ref, after);
+  return status;
+}
+
+bool ExecutableSet::Trigger(bool async) {
+  (void)async;
+  bool status = false;
+  auto device = Executor()->Device();
+  for ( auto executable : executables_ ) {
+    device->Submit(executable->NBGraph());
+  }
+  status = device->Trigger();
+  device->WaitDeviceIdle();
+  return status;
+}
+
+std::shared_ptr<ITensorHandle> ExecutableSet::AllocateTensor(const TensorSpec& tensor_spec) {
+  std::shared_ptr<ITensorHandle> tensor_handle_sp;
+  (void) tensor_spec;
+  return tensor_handle_sp;
+}
+
+std::vector<std::shared_ptr<IExecutable>> ExecutableSet::Executables() const {
+  return executables_;
+}
+
+bool ExecutableSet::Verify() {
+  bool status = false;
+  for ( auto executable : executables_ ) {
+    status = executable->Verify();
+  }
+  return status;
+}
+
+std::shared_ptr<Context> IExecutor::Contex() const {
+  return context_;
+}
+
+NativeExecutor::NativeExecutor(const std::shared_ptr<IDevice>& device) {
+  device_ = device;
+  context_ = Context::Create();
+}
+
+NativeExecutor::NativeExecutor(const std::shared_ptr<IDevice>& device, const std::shared_ptr<Context>& context) {
+  device_ = device;
+  context_ = context;
+}
+
+bool NativeExecutor::Submit(const std::shared_ptr<IExecutable>& executable, const std::shared_ptr<IExecutable>& ref, bool after) {
+  bool success = false;
+  success = executable->Verify();
+  if (success == false) {
+    std::cout<<"Executable NBG compile failed";
+    return false;
+  }
+  if (executable == ref) {
+    tasks_.push_back(executable);
+    return true;
+  }
+  for (size_t i = 0; i < tasks_.size(); i++) {
+    if (tasks_[i].lock() == ref) {
+      if (after == true) {
+        tasks_.insert(tasks_.begin()+i+1, executable);
+        success = true;
+        break;
+      }
+      else{
+        tasks_.insert(tasks_.begin()+i, executable);
+        success = true;
+        break;
+      }
+    }
+  }
+  return success;
+}
+
+bool NativeExecutor::Trigger(bool async) {
+  (void)async;
+  while (!tasks_.empty()) {
+    auto task = tasks_.front();
+    tasks_.erase(tasks_.begin());
+    auto task_ = task.lock();
+    if (!task_) {
+        std::cout<< "Task unable to lock weak_ptr";
+    }
+    task_->Trigger();
+  }
+  device_->WaitDeviceIdle();
+  return true;
+}
+
+std::shared_ptr<IExecutable> NativeExecutor::Compile(const std::shared_ptr<Graph>& graph) {
+  GraphImpl* graphimp= dynamic_cast<GraphImpl*> (graph.get()); // hack to downcast
+  IDevice::device_id_t id = device_->Id();
+  vxSetGraphAttribute(graphimp->graph()->g, VX_GRAPH_DEVICE_INDEX_VIV, (void*)(&id), sizeof(id));
+  size_t bin_size = -1;
+  graph->CompileToBinary(nullptr, &bin_size);
+  std::vector<char> nb_buf;
+  nb_buf.resize(bin_size);
+  size_t inputs = graph->InputsTensor().size();
+  size_t outputs = graph->OutputsTensor().size();
+  graph->CompileToBinary(nb_buf.data(), &bin_size);
+  std::shared_ptr<IExecutor> this_sp = shared_from_this();
+  IExecutable* executable = new NativeExecutable(this_sp, nb_buf, inputs, outputs);
+  std::shared_ptr<IExecutable> executable_sp(executable);
+  return executable_sp;
+}
+
+std::shared_ptr<IDevice> IExecutor::Device() const {
+  return device_;
+}
+
+std::shared_ptr<Tensor> ITensorHandle::GetTensor() const {
+  return tensor_;
+}
+
+NativeTensorHandle::NativeTensorHandle(const std::shared_ptr<Tensor>& tensor) {
+  tensor_ = tensor;
+}
+
+bool NativeTensorHandle::CopyDataToTensor(const void* data, uint32_t size_in_bytes) {
+  return tensor_->CopyDataToTensor(data, size_in_bytes);
+}
+
+bool NativeTensorHandle::CopyTensorToData(void* data) {
+  return tensor_->CopyDataFromTensor(data);
+}
+
+}  // namespace platform
+}  // namespace vx
+}  // namespace tim
--- a/src/tim/vx/platform/native_device_private.h
+++ b/src/tim/vx/platform/native_device_private.h
@ -0,0 +1,58 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#ifndef TIM_VX_NATIVE_DEVICE_PRIVATE_H_
+#define TIM_VX_NATIVE_DEVICE_PRIVATE_H_
+
+#include "tim/vx/platform/native.h"
+#include "vip/virtual_device.h"
+#include "graph_private.h"
+
+namespace tim {
+namespace vx {
+
+class GraphImpl;
+
+namespace platform {
+
+class NativeDeviceImpl : public NativeDevice {
+ public:
+  NativeDeviceImpl(device_id_t id);
+  ~NativeDeviceImpl(){};
+
+  bool Submit(const std::shared_ptr<tim::vx::Graph>& graph) override;
+  bool Trigger(bool async = false, async_callback cb = NULL) override;
+  bool DeviceExit() override;
+  void WaitDeviceIdle() override;
+
+ protected:
+  std::unique_ptr<vip::IDevice> vip_device_;
+  std::vector<vsi_nn_graph_t*> vsi_graph_v_;
+
+};
+
+}  // namespace platform
+}  // namespace vx
+}  // namespace tim
+
+#endif /* TIM_VX_NATIVE_DEVICE_PRIVATE_H_*/