From 27890719b617ad3539446b71b36a322adce1afc8 Mon Sep 17 00:00:00 2001 From: liyuenan <37231553+liyuenan2333@users.noreply.github.com> Date: Tue, 28 Mar 2023 09:51:23 +0800 Subject: [PATCH] Support remote platform by gRPC (#561) * Support remote platform by gRPC Signed-off-by: yuenan.li --- CMakeLists.txt | 17 +- README.md | 5 + cmake/gRPC.cmake | 27 ++ include/tim/vx/platform/grpc/grpc_remote.h | 99 +++++ include/tim/vx/platform/lite/lite_native.h | 96 +++++ include/tim/vx/platform/native.h | 32 +- include/tim/vx/platform/platform.h | 45 ++- samples/CMakeLists.txt | 6 + samples/grpc/CMakeLists.txt | 11 + samples/grpc/README.txt | 2 + samples/grpc/grpc_multi_device.cc | 87 +++++ samples/lite_multi_device/CMakeLists.txt | 13 + .../lite_multi_device/lite_multi_device.cc | 79 ++++ src/tim/CMakeLists.txt | 136 +++++-- src/tim/vx/platform/grpc/README.md | 51 +++ src/tim/vx/platform/grpc/grpc_platform.proto | 120 ++++++ .../vx/platform/grpc/grpc_platform_client.cc | 240 ++++++++++++ .../vx/platform/grpc/grpc_platform_client.h | 77 ++++ .../vx/platform/grpc/grpc_platform_server.cc | 348 ++++++++++++++++++ src/tim/vx/platform/grpc/grpc_remote.cc | 176 +++++++++ src/tim/vx/platform/lite/lite_native.cc | 312 ++++++++++++++++ src/tim/vx/platform/native.cc | 124 ++++--- 22 files changed, 1983 insertions(+), 120 deletions(-) create mode 100644 cmake/gRPC.cmake create mode 100644 include/tim/vx/platform/grpc/grpc_remote.h create mode 100644 include/tim/vx/platform/lite/lite_native.h create mode 100644 samples/grpc/CMakeLists.txt create mode 100644 samples/grpc/README.txt create mode 100644 samples/grpc/grpc_multi_device.cc create mode 100644 samples/lite_multi_device/CMakeLists.txt create mode 100644 samples/lite_multi_device/lite_multi_device.cc create mode 100644 src/tim/vx/platform/grpc/README.md create mode 100644 src/tim/vx/platform/grpc/grpc_platform.proto create mode 100644 src/tim/vx/platform/grpc/grpc_platform_client.cc create mode 100644 src/tim/vx/platform/grpc/grpc_platform_client.h create mode 100644 src/tim/vx/platform/grpc/grpc_platform_server.cc create mode 100644 src/tim/vx/platform/grpc/grpc_remote.cc create mode 100644 src/tim/vx/platform/lite/lite_native.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index ab9f6a1..1e388a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,8 @@ option(TIM_VX_BUILD_EXAMPLES "Build demos show general usage" option(TIM_VX_ENABLE_VIPLITE "Enable lite driver api support" OFF) option(TIM_VX_ENABLE_40BIT "Enable large memory support" OFF) option(TIM_VX_ENABLE_PLATFORM "Enable multi devices support" OFF) +option(TIM_VX_ENABLE_PLATFORM_LITE "Enable lite multi-device support" OFF) +option(TIM_VX_ENABLE_GRPC "Enable gPRC support" OFF) option(TIM_VX_DBG_ENABLE_TENSOR_HNDL "Enable built-in tensor from handle: use malloced memory instead of VideoMemory by kernel driver" ON) set(CMAKE_CXX_STANDARD 14) @@ -24,6 +26,10 @@ if(${TIM_VX_CODE_COVERAGE}) set(CMAKE_C_FLAGS "-g -O0 --coverage -fprofile-arcs -ftest-coverage") endif() +if(${TIM_VX_ENABLE_PLATFORM_LITE}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_PLATFORM_LITE") +endif() + if(${TIM_VX_DBG_ENABLE_TENSOR_HNDL}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_TENSOR_HNDL=1") else() @@ -69,10 +75,9 @@ endif() if(TIM_VX_ENABLE_TEST) include(FetchContent) FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.12.0 - ) + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.12.0) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) @@ -84,6 +89,10 @@ if(TIM_VX_ENABLE_TEST) endif() endif() +if(TIM_VX_ENABLE_GRPC) + include(cmake/gRPC.cmake) +endif() + add_subdirectory("src/tim") if(TIM_VX_BUILD_EXAMPLES) diff --git a/README.md b/README.md index a069977..5967a3f 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,11 @@ cmake options: |`EXTERNAL_VIV_SDK`| Give external vivante openvx driver libraries | Not set| |`TIM_VX_BUILD_EXAMPLES`| Build example applications | OFF | |`TIM_VX_ENABLE_40BIT` | Enable large memory (over 4G) support in NPU driver | OFF | +|`TIM_VX_ENABLE_PLATFORM` | Enable multi devices support | OFF | +|`TIM_VX_ENABLE_PLATFORM_LITE` | Enable lite multi-device support, only work when `TIM_VX_ENABLE_PLATFORM`=ON | OFF | +|`VIP_LITE_SDK` | full path to VIPLite sdk, required when `TIM_VX_ENABLE_PLATFORM_LITE`=ON | Not set | +|`TIM_VX_ENABLE_GRPC` | Enable gPRC support, only work when `TIM_VX_ENABLE_PLATFORM`=ON | OFF | +|`TIM_VX_DBG_ENABLE_TENSOR_HNDL` | Enable built-in tensor from handle | ON | ---- Run unit test: diff --git a/cmake/gRPC.cmake b/cmake/gRPC.cmake new file mode 100644 index 0000000..5d739e7 --- /dev/null +++ b/cmake/gRPC.cmake @@ -0,0 +1,27 @@ +find_package(Threads REQUIRED) + +# Find Protobuf installation +# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. +set(protobuf_MODULE_COMPATIBLE TRUE) +find_package(Protobuf CONFIG REQUIRED) +message(STATUS "Using protobuf ${Protobuf_VERSION}") + +set(PROTOBUF_LIBPROTOBUF protobuf::libprotobuf) +set(GRPCPP_REFLECTION gRPC::grpc++_reflection) +if(CMAKE_CROSSCOMPILING) + find_program(PROTOBUF_PROTOC protoc) +else() + set(PROTOBUF_PROTOC $) +endif() + +# Find gRPC installation +# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. +find_package(gRPC CONFIG REQUIRED) +message(STATUS "Using gRPC ${gRPC_VERSION}") + +set(GRPC_GRPCPP gRPC::grpc++) +if(CMAKE_CROSSCOMPILING) + find_program(GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) +else() + set(GRPC_CPP_PLUGIN_EXECUTABLE $) +endif() diff --git a/include/tim/vx/platform/grpc/grpc_remote.h b/include/tim/vx/platform/grpc/grpc_remote.h new file mode 100644 index 0000000..63d8e32 --- /dev/null +++ b/include/tim/vx/platform/grpc/grpc_remote.h @@ -0,0 +1,99 @@ +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#ifndef TIM_VX_GRPC_REMOTE_H_ +#define TIM_VX_GRPC_REMOTE_H_ + +#include "tim/vx/platform/platform.h" + +namespace tim { +namespace vx { +namespace platform { + +class GRPCPlatformClient; + +class GRPCRemoteDevice : public IDevice { + public: + GRPCRemoteDevice(int32_t id, std::shared_ptr client); + bool Submit(const std::shared_ptr& graph) override; + bool Trigger(bool async = false, async_callback cb = NULL) override; + bool DeviceExit() override; + void WaitDeviceIdle() override; + void RemoteReset() override; + static std::vector> Enumerate( + const std::string& port); + + std::shared_ptr client_; +}; + +class GRPCRemoteExecutor : public IExecutor { + public: + GRPCRemoteExecutor(std::shared_ptr device); + bool Submit(const std::shared_ptr& executable, + const std::shared_ptr& ref, + bool after = true) override; + bool Trigger(bool async = false) override; + std::shared_ptr Compile( + const std::shared_ptr& graph) override; + int32_t Id() const; + + private: + int32_t executor_id_; + std::shared_ptr device_; +}; + +class GRPCRemoteExecutable : public IExecutable { + public: + GRPCRemoteExecutable(int32_t id, std::shared_ptr device); + void SetInput(const std::shared_ptr& th) override; + void SetOutput(const std::shared_ptr& th) override; + void GetOutput( + const std::vector>& th) override; + bool Submit(const std::shared_ptr& ref, bool after) override; + bool Trigger(bool async) override; + bool Verify() override; + std::shared_ptr AllocateTensor( + const TensorSpec& tensor_spec) override; + int32_t Id() const; + + private: + int32_t executable_id_; + std::shared_ptr device_; +}; + +class GRPCRemoteTensorHandle : public ITensorHandle { + public: + GRPCRemoteTensorHandle(int32_t id, std::shared_ptr device); + bool CopyDataToTensor(const void* data, uint32_t size_in_bytes) override; + bool CopyDataFromTensor(void* data) override; + int32_t Id() const; + + private: + int32_t tensor_id_; + std::shared_ptr device_; +}; + +} // namespace platform +} // namespace vx +} // namespace tim +#endif diff --git a/include/tim/vx/platform/lite/lite_native.h b/include/tim/vx/platform/lite/lite_native.h new file mode 100644 index 0000000..b83d5fe --- /dev/null +++ b/include/tim/vx/platform/lite/lite_native.h @@ -0,0 +1,96 @@ +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#ifndef TIM_VX_LITE_NATIVE_H_ +#define TIM_VX_LITE_NATIVE_H_ + +#include "tim/vx/platform/platform.h" +#include "vip_lite.h" +#include "nbg_linker.h" + +namespace tim { +namespace vx { +namespace platform { + +class LiteNativeExecutor + : public IExecutor, + public std::enable_shared_from_this { + public: + LiteNativeExecutor(const std::shared_ptr& device); + virtual ~LiteNativeExecutor(); + bool Submit(const std::shared_ptr& executable, + const std::shared_ptr& ref, + bool after = true) override; + bool Trigger(bool async = false) override; + std::shared_ptr Compile( + const std::shared_ptr& graph) override; + + private: + vip_task_descriptor_t* task_descriptor_; + vip_database database_; +}; + +class LiteNativeExecutable : public IExecutable { + public: + LiteNativeExecutable(const std::shared_ptr& executor, + const std::vector& nb_buf); + virtual ~LiteNativeExecutable(); + void SetInput(const std::shared_ptr& th) override; + void SetOutput(const std::shared_ptr& th) override; + void GetOutput( + const std::vector>& th) override; + bool Submit(const std::shared_ptr& ref, bool after) override; + bool Trigger(bool async) override; + bool Verify() override; + std::shared_ptr AllocateTensor( + const TensorSpec& tensor_spec) override; + + vip_network network_; + + private: + void SetBuffer(vip_memory_t* dst, gcvip_videomemory_t* src); + + int32_t input_count_; + int32_t output_count_; + + gcvip_videomemory_t* coeff_; + gcvip_videomemory_t* command_; + gcvip_videomemory_t* memory_pool_; + gcvip_videomemory_t* others_; + gcvip_videomemory_t* pre_command_; +}; + +class LiteNativeTensorHandle : public ITensorHandle { + public: + LiteNativeTensorHandle(const std::shared_ptr& tensr); + virtual ~LiteNativeTensorHandle(); + bool CopyDataToTensor(const void* data, uint32_t size_in_bytes) override; + bool CopyDataFromTensor(void* data) override; + + gcvip_videomemory_t* tensor_buffer_; +}; +} // namespace platform +} // namespace vx +} // namespace tim + +#endif \ No newline at end of file diff --git a/include/tim/vx/platform/native.h b/include/tim/vx/platform/native.h index 18adb4c..cecf34a 100644 --- a/include/tim/vx/platform/native.h +++ b/include/tim/vx/platform/native.h @@ -38,36 +38,43 @@ class NativeDevice : public IDevice { virtual bool DeviceExit() = 0; virtual void WaitDeviceIdle() = 0; static std::vector> Enumerate(); - }; -class NativeExecutable : public IExecutable{ +class NativeExecutable : public IExecutable { public: - NativeExecutable(const std::shared_ptr& executor, const std::vector& nb_buf, size_t inputs, size_t outputs); + NativeExecutable(const std::shared_ptr& executor, + const std::vector& nb_buf, size_t inputs, + size_t outputs); ~NativeExecutable(){}; void SetInput(const std::shared_ptr& th) override; void SetOutput(const std::shared_ptr& th) override; - void GetOutput(const std::vector>& th) override; - bool Submit(const std::shared_ptr& ref, bool after = true) override; + void GetOutput( + const std::vector>& th) override; + bool Submit(const std::shared_ptr& ref, + bool after = true) override; bool Trigger(bool async = false) override; - std::shared_ptr AllocateTensor(const TensorSpec& tensor_spec) override; + std::shared_ptr AllocateTensor( + const TensorSpec& tensor_spec) override; bool Verify() override; protected: std::shared_ptr nb_node_; std::vector nb_buf_; - }; -class NativeExecutor : public IExecutor, public std::enable_shared_from_this{ +class NativeExecutor : public IExecutor, + public std::enable_shared_from_this { public: NativeExecutor(const std::shared_ptr& device); - NativeExecutor(const std::shared_ptr& device, const std::shared_ptr& context); + NativeExecutor(const std::shared_ptr& device, + const std::shared_ptr& context); ~NativeExecutor(){}; - bool Submit(const std::shared_ptr& executable, const std::shared_ptr& ref, bool after = true) override; + bool Submit(const std::shared_ptr& executable, + const std::shared_ptr& ref, + bool after = true) override; bool Trigger(bool async = false) override; - std::shared_ptr Compile(const std::shared_ptr& graph) override; - + std::shared_ptr Compile( + const std::shared_ptr& graph) override; }; class NativeTensorHandle : public ITensorHandle { @@ -75,7 +82,6 @@ class NativeTensorHandle : public ITensorHandle { NativeTensorHandle(const std::shared_ptr& tensor); bool CopyDataToTensor(const void* data, uint32_t size_in_bytes) override; bool CopyDataFromTensor(void* data) override; - }; } // namespace platform diff --git a/include/tim/vx/platform/platform.h b/include/tim/vx/platform/platform.h index 9a3c853..e90bce7 100644 --- a/include/tim/vx/platform/platform.h +++ b/include/tim/vx/platform/platform.h @@ -39,7 +39,7 @@ namespace vx { class Graph; class Context; -namespace ops{ +namespace ops { class NBG; } @@ -51,13 +51,16 @@ class ExecutableSet; class IExecutor; class ITensorHandle; -std::shared_ptr Compile(const std::shared_ptr& graph, const std::shared_ptr& executor); -std::shared_ptr CreateExecutableSet(const std::vector>& executables); +std::shared_ptr Compile( + const std::shared_ptr& graph, + const std::shared_ptr& executor); +std::shared_ptr CreateExecutableSet( + const std::vector>& executables); class IDevice { public: using device_id_t = uint32_t; - using async_callback = std::function; + using async_callback = std::function; using data_t = const void*; virtual ~IDevice(){}; virtual bool Submit(const std::shared_ptr& graph) = 0; @@ -65,6 +68,7 @@ class IDevice { device_id_t Id() const; virtual void WaitDeviceIdle() = 0; virtual bool DeviceExit() = 0; + virtual void RemoteReset(); protected: device_id_t device_id_; @@ -74,9 +78,12 @@ class IExecutor { public: using task = std::weak_ptr; virtual ~IExecutor(){}; - virtual bool Submit(const std::shared_ptr& executable, const std::shared_ptr& ref, bool after=true) = 0; + virtual bool Submit(const std::shared_ptr& executable, + const std::shared_ptr& ref, + bool after = true) = 0; virtual bool Trigger(bool async = false) = 0; // todo: async=true - virtual std::shared_ptr Compile(const std::shared_ptr& graph) = 0; + virtual std::shared_ptr Compile( + const std::shared_ptr& graph) = 0; virtual std::shared_ptr Device() const; virtual std::shared_ptr Contex() const; @@ -86,17 +93,20 @@ class IExecutor { std::shared_ptr context_; }; -class IExecutable : public std::enable_shared_from_this{ +class IExecutable : public std::enable_shared_from_this { public: virtual ~IExecutable(){}; virtual void SetInput(const std::shared_ptr& th) = 0; virtual void SetOutput(const std::shared_ptr& th) = 0; - virtual void GetOutput(const std::vector>& th) = 0; // for remote - virtual bool Submit(const std::shared_ptr& ref, bool after = true) = 0; + virtual void GetOutput( + const std::vector>& th) = 0; // for remote + virtual bool Submit(const std::shared_ptr& ref, + bool after = true) = 0; virtual bool Trigger(bool async = false) = 0; // todo: async=true virtual bool Verify() = 0; virtual std::shared_ptr NBGraph() const; - virtual std::shared_ptr AllocateTensor(const TensorSpec& tensor_spec) = 0; + virtual std::shared_ptr AllocateTensor( + const TensorSpec& tensor_spec) = 0; virtual std::shared_ptr Executor() const; protected: @@ -105,21 +115,23 @@ class IExecutable : public std::enable_shared_from_this{ std::shared_ptr nb_graph_; }; -class ExecutableSet : public IExecutable{ +class ExecutableSet : public IExecutable { public: ExecutableSet(const std::vector>& executables); void SetInput(const std::shared_ptr& th) override; void SetOutput(const std::shared_ptr& th) override; - void GetOutput(const std::vector>& th) override; - bool Submit(const std::shared_ptr& ref, bool after = true) override; + void GetOutput( + const std::vector>& th) override; + bool Submit(const std::shared_ptr& ref, + bool after = true) override; bool Trigger(bool async = false) override; bool Verify() override; - std::shared_ptr AllocateTensor(const TensorSpec& tensor_spec) override; + std::shared_ptr AllocateTensor( + const TensorSpec& tensor_spec) override; std::vector> Executables() const; - + protected: std::vector> executables_; - }; class ITensorHandle { @@ -131,7 +143,6 @@ class ITensorHandle { protected: std::shared_ptr tensor_; - }; } // namespace platform diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index e403dc8..919c0a5 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -20,4 +20,10 @@ endif() if(TIM_VX_ENABLE_PLATFORM) add_subdirectory("lenet_multi_device") add_subdirectory("multi_device") + if(${TIM_VX_ENABLE_PLATFORM_LITE}) + add_subdirectory("lite_multi_device") + endif() + if(TIM_VX_ENABLE_GRPC) + add_subdirectory("grpc") + endif() endif() diff --git a/samples/grpc/CMakeLists.txt b/samples/grpc/CMakeLists.txt new file mode 100644 index 0000000..326f275 --- /dev/null +++ b/samples/grpc/CMakeLists.txt @@ -0,0 +1,11 @@ +message("samples/grpc") + +set(TARGET_NAME "grpc_multi_device") + +add_executable(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/grpc_multi_device.cc) + +target_link_libraries(${TARGET_NAME} PRIVATE -Wl,--whole-archive tim-vx) +target_include_directories(${TARGET_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/include) + +install(TARGETS ${TARGET_NAME} ${TARGET_NAME} + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}) \ No newline at end of file diff --git a/samples/grpc/README.txt b/samples/grpc/README.txt new file mode 100644 index 0000000..d456382 --- /dev/null +++ b/samples/grpc/README.txt @@ -0,0 +1,2 @@ +run grpc_multi_device with a given port, for example +./grpc_multi_device 0.0.0.0:50051 \ No newline at end of file diff --git a/samples/grpc/grpc_multi_device.cc b/samples/grpc/grpc_multi_device.cc new file mode 100644 index 0000000..42efa24 --- /dev/null +++ b/samples/grpc/grpc_multi_device.cc @@ -0,0 +1,87 @@ + +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#include "tim/vx/context.h" +#include "tim/vx/graph.h" +#include "tim/vx/ops.h" +#include "tim/vx/types.h" +#include "tim/vx/platform/grpc/grpc_remote.h" + +int main(int argc, char** argv) { + if(argc < 2) { + std::cout << "error: need a port to connect." << std::endl; + return -1; + } + //construct tim-vx graph + auto ctx = tim::vx::Context::Create(); + auto graph = ctx->CreateGraph(); + + tim::vx::ShapeType io_shape({2, 2}); + tim::vx::TensorSpec input_spec(tim::vx::DataType::INT32, io_shape, + tim::vx::TensorAttribute::INPUT); + tim::vx::TensorSpec output_spec(tim::vx::DataType::INT32, io_shape, + tim::vx::TensorAttribute::OUTPUT); + auto input_t0 = graph->CreateTensor(input_spec); + auto input_t1 = graph->CreateTensor(input_spec); + auto output_t = graph->CreateTensor(output_spec); + + auto add = graph->CreateOperation(); + (*add).BindInputs({input_t0, input_t1}).BindOutputs({output_t}); + + std::vector data_vec_i0({1, 2, 3, 4}); + std::vector data_vec_i1({4, 3, 2, 1}); + + //use grpc with platfrom remote API + std::string port(argv[1]); + auto devices = tim::vx::platform::GRPCRemoteDevice::Enumerate(port); + auto device = devices[0]; + auto executor = std::make_shared(device); + auto executable = executor->Compile(graph); + auto input0_handle = executable->AllocateTensor(input_spec); + auto input1_handle = executable->AllocateTensor(input_spec); + auto output_handle = executable->AllocateTensor(output_spec); + executable->SetInput(input0_handle); + executable->SetInput(input1_handle); + executable->SetOutput(output_handle); + input0_handle->CopyDataToTensor(data_vec_i0.data(), + data_vec_i0.size() * sizeof(int)); + input1_handle->CopyDataToTensor(data_vec_i1.data(), + data_vec_i1.size() * sizeof(int)); + executable->Submit(executable); + executor->Trigger(); + + int* data = (int*)malloc(4 * sizeof(int)); + + output_handle->CopyDataFromTensor(data); + + //each output value should be "5" in this demo + for (int i = 0; i < 4; ++i) { + std::cout << "output value: " << data[i] << std::endl; + } + free(data); + + //important step, reset after service + device->RemoteReset(); + return 0; +} diff --git a/samples/lite_multi_device/CMakeLists.txt b/samples/lite_multi_device/CMakeLists.txt new file mode 100644 index 0000000..0356eef --- /dev/null +++ b/samples/lite_multi_device/CMakeLists.txt @@ -0,0 +1,13 @@ +message("samples/lite_multi_device") + +set(TARGET_NAME "lite_multi_device") + +add_executable(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/lite_multi_device.cc) + +target_link_libraries(${TARGET_NAME} PRIVATE -Wl,--whole-archive tim-vx) +target_include_directories(${TARGET_NAME} PRIVATE + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/prebuilt-sdk/viplite/build/sdk/include) + +install(TARGETS ${TARGET_NAME} ${TARGET_NAME} + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}) \ No newline at end of file diff --git a/samples/lite_multi_device/lite_multi_device.cc b/samples/lite_multi_device/lite_multi_device.cc new file mode 100644 index 0000000..51aec07 --- /dev/null +++ b/samples/lite_multi_device/lite_multi_device.cc @@ -0,0 +1,79 @@ + +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#include "tim/vx/context.h" +#include "tim/vx/graph.h" +#include "tim/vx/ops.h" +#include "tim/vx/types.h" +#include "tim/vx/platform/native.h" +#include "tim/vx/platform/lite/lite_native.h" + +int main() { + //construct tim-vx graph + auto ctx = tim::vx::Context::Create(); + auto graph = ctx->CreateGraph(); + + tim::vx::ShapeType io_shape({2, 2}); + tim::vx::TensorSpec input_spec(tim::vx::DataType::INT32, io_shape, + tim::vx::TensorAttribute::INPUT); + tim::vx::TensorSpec output_spec(tim::vx::DataType::INT32, io_shape, + tim::vx::TensorAttribute::OUTPUT); + auto input_t0 = graph->CreateTensor(input_spec); + auto input_t1 = graph->CreateTensor(input_spec); + auto output_t = graph->CreateTensor(output_spec); + + auto add = graph->CreateOperation(); + (*add).BindInputs({input_t0, input_t1}).BindOutputs({output_t}); + + std::vector data_vec_i0({1, 2, 3, 4}); + std::vector data_vec_i1({4, 3, 2, 1}); + + auto devices = tim::vx::platform::NativeDevice::Enumerate(); + auto device = devices[0]; + auto executor = std::make_shared(device); + auto executable = executor->Compile(graph); + auto input0_handle = executable->AllocateTensor(input_spec); + auto input1_handle = executable->AllocateTensor(input_spec); + auto output_handle = executable->AllocateTensor(output_spec); + executable->SetInput(input0_handle); + executable->SetInput(input1_handle); + executable->SetOutput(output_handle); + input0_handle->CopyDataToTensor(data_vec_i0.data(), + data_vec_i0.size() * sizeof(int)); + input1_handle->CopyDataToTensor(data_vec_i1.data(), + data_vec_i1.size() * sizeof(int)); + executable->Submit(executable); + executor->Trigger(); + + int* data = (int*)malloc(4 * sizeof(int)); + + output_handle->CopyDataFromTensor(data); + + //each output value should be "5" in this demo + for (int i = 0; i < 4; ++i) { + std::cout << "output value: " << data[i] << std::endl; + } + free(data); + return 0; +} diff --git a/src/tim/CMakeLists.txt b/src/tim/CMakeLists.txt index b1dbc34..1172297 100644 --- a/src/tim/CMakeLists.txt +++ b/src/tim/CMakeLists.txt @@ -12,8 +12,7 @@ endif() set(${TARGET_NAME}_SRCS) list(APPEND ${TARGET_NAME}_SRCS ${VX_SRC} - ${OPS_SRC} -) + ${OPS_SRC}) if(${TIM_VX_USE_EXTERNAL_OVXLIB}) find_library(OVXLIB_LIB NAMES "ovxlib") @@ -41,16 +40,60 @@ if(TIM_VX_ENABLE_LAYOUT_INFER) list(APPEND ${TARGET_NAME}_SRCS ${LAYOUT_INFER_FRAMEWORK_SRCS} - ${LAYOUT_INFER_OP_SRCS} - ) + ${LAYOUT_INFER_OP_SRCS}) endif() if(TIM_VX_ENABLE_PLATFORM) - message(STATUS "Using paltform") + message(STATUS "Using platform") aux_source_directory(./vx/platform PLATFORM_SRC) list(APPEND ${TARGET_NAME}_SRCS - ${PLATFORM_SRC} - ) + ${PLATFORM_SRC}) + + if(${TIM_VX_ENABLE_PLATFORM_LITE}) + message(STATUS "Using lite paltform") + list(APPEND ${TARGET_NAME}_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/vx/platform/lite/lite_native.cc) + if(NOT VIP_LITE_SDK) + message(FATAL_ERROR "Please set VIP_LITE_SDK when using lite platform(TIM_VX_ENABLE_PLATFORM_LITE)") + endif() + list(APPEND EXTERNAL_LIBS + ${VIP_LITE_SDK}/drivers/libNBGlinker.so + ${VIP_LITE_SDK}/drivers/libVIPlite.so) + list(APPEND INC_DIRS ${VIP_LITE_SDK}/include) + endif() + + if(TIM_VX_ENABLE_GRPC) + list(APPEND ${TARGET_NAME}_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/vx/platform/grpc/grpc_platform_client.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/vx/platform/grpc/grpc_remote.cc") + + # Proto file + get_filename_component(gp_proto "${CMAKE_CURRENT_SOURCE_DIR}/vx/platform/grpc/grpc_platform.proto" ABSOLUTE) + get_filename_component(gp_proto_path "${gp_proto}" PATH) + + # Generate sources + set(gp_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/grpc_platform.pb.cc") + set(gp_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/grpc_platform.pb.h") + set(gp_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/grpc_platform.grpc.pb.cc") + set(gp_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/grpc_platform.grpc.pb.h") + add_custom_command( + OUTPUT "${gp_proto_srcs}" "${gp_proto_hdrs}" "${gp_grpc_srcs}" "${gp_grpc_hdrs}" + COMMAND ${PROTOBUF_PROTOC} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${gp_proto_path}" + --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN_EXECUTABLE}" + "${gp_proto}" + DEPENDS "${gp_proto}") + + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + + list(APPEND ${TARGET_NAME}_SRCS + ${gp_grpc_srcs} + ${gp_grpc_hdrs} + ${gp_proto_srcs} + ${gp_proto_hdrs}) + endif() endif() foreach(src_file ${${TARGET_NAME}_SRCS}) @@ -67,8 +110,7 @@ list(APPEND INC_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/vx ${CMAKE_CURRENT_SOURCE_DIR}/transform ${OVXLIB_INCLUDE_DIR} - ${OVXDRV_INCLUDE_DIRS} -) + ${OVXDRV_INCLUDE_DIRS}) if(${TIM_VX_ENABLE_VIPLITE}) aux_source_directory(./lite LITE_SRC) @@ -77,10 +119,9 @@ if(${TIM_VX_ENABLE_VIPLITE}) list(APPEND EXTERNAL_LIBS ${VIPLITE_DRV_LIBRARIES}) list(APPEND INC_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/lite - ${VIPLITE_DRV_INCLUDE_DIR} - ) + ${VIPLITE_DRV_INCLUDE_DIR}) endif() - +include_directories(${INC_DIRS}) # convert op list as compile flags so that we can implement compile compatable easier if(${TIM_VX_USE_EXTERNAL_OVXLIB}) file(STRINGS "${OVXLIB_INC}/interface/ops.def" ops_file_content) @@ -97,49 +138,88 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${op_as_flags}") add_library(${TARGET_NAME} ${${TARGET_NAME}_SRCS}) target_include_directories(${TARGET_NAME} PRIVATE ${INC_DIRS}) target_link_libraries(${TARGET_NAME} PUBLIC - -Wl,--no-whole-archive ${EXTERNAL_LIBS} ${OVXDRV_LIBRARIES}) + -Wl,--no-whole-archive ${OVXDRV_LIBRARIES} ${EXTERNAL_LIBS}) if(${TIM_VX_USE_EXTERNAL_OVXLIB}) #-Wl,--whole-archive should not applied to external library, but only for shared library - target_link_libraries(${TARGET_NAME} PUBLIC tim_internal) + target_link_libraries(${TARGET_NAME} PUBLIC tim_internal) endif() -if (NOT CMAKE_INSTALL_LIBDIR) +if(TIM_VX_ENABLE_PLATFORM AND TIM_VX_ENABLE_GRPC) + target_link_libraries(${TARGET_NAME} PUBLIC + ${GRPCPP_REFLECTION} + ${GRPC_GRPCPP} + ${PROTOBUF_LIBPROTOBUF}) + + add_executable(grpc_platform_server + ${CMAKE_CURRENT_SOURCE_DIR}/vx/platform/grpc/grpc_platform_server.cc) + target_link_libraries(grpc_platform_server -Wl,--whole-archive ${TARGET_NAME}) + install(TARGETS grpc_platform_server grpc_platform_server + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}) +endif() +if(NOT CMAKE_INSTALL_LIBDIR) set(CMAKE_INSTALL_LIBDIR "lib") endif() +# Install install(TARGETS ${TARGET_NAME} ${TARGET_NAME} DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) -install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx - DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim) +install( + FILES + ${CMAKE_SOURCE_DIR}/include/tim/vx/builtin_op.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/compile_option.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/context.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/graph.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/operation.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/ops.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/tensor.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/types.h + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim/vx) -install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/lite +install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx/ops + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim/vx) + +if(TIM_VX_ENABLE_VIPLITE) + install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/lite DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim) +endif() if(TIM_VX_ENABLE_LAYOUT_INFER) - install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/transform - DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim) + install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/transform + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim) +endif() + +if(TIM_VX_ENABLE_PLATFORM) + install( + FILES + ${CMAKE_SOURCE_DIR}/include/tim/vx/platform/platform.h + ${CMAKE_SOURCE_DIR}/include/tim/vx/platform/native.h + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim/vx/platform) + if(TIM_VX_ENABLE_PLATFORM_LITE) + install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx/platform/lite + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim/vx/platform) + endif() + if(TIM_VX_ENABLE_GRPC) + install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx/platform/grpc + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim/vx/platform) + endif() endif() if(TIM_VX_ENABLE_TEST) include(GoogleTest) add_executable(unit_test ${${TARGET_NAME}_TEST_SRCS}) - target_link_libraries(unit_test PRIVATE gtest gtest_main gmock gmock_main ${TARGET_NAME} ${OVXDRV_LIBRARIES}) + target_link_libraries(unit_test PRIVATE + -Wl,--whole-archive ${TARGET_NAME} + -Wl,--no-whole-archive gtest gtest_main gmock gmock_main ${OVXDRV_LIBRARIES}) target_include_directories(unit_test PRIVATE ${PROJECT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/vx ${OVXLIB_INCLUDE_DIR} - ${INC_DIRS} - ) + ${INC_DIRS}) install(TARGETS unit_test DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}) endif() -if(TIM_VX_ENABLE_PLATFORM) - install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/tim/vx/platform - DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/tim/vx) -endif() - add_subdirectory("utils") diff --git a/src/tim/vx/platform/grpc/README.md b/src/tim/vx/platform/grpc/README.md new file mode 100644 index 0000000..22e5dc3 --- /dev/null +++ b/src/tim/vx/platform/grpc/README.md @@ -0,0 +1,51 @@ +tim-vx uses gRPC to provide a remote service, by which you can deploy your model on a remote device. +## Build and run on host +1. Build and install gRPC, see [build gRPC C++](https://github.com/grpc/grpc/blob/master/BUILDING.md) +2. Build tim-vx with gRPC +```shell +$ cd ${tim_vx_root} +$ mkdir host_build && cd host_build +$ cmake .. \ + -DTIM_VX_ENABLE_PLATFORM=ON \ + -DTIM_VX_ENABLE_GRPC=ON \ + -DTIM_VX_BUILD_EXAMPLES=ON \ + -DCMAKE_PREFIX_PATH=${grpc_host_install_path} +$ make -j4 +$ make install +``` +3. Start server +```shell +$ export LD_LIBRARY_PATH=${tim_vx_root}/host_build/install/lib:${tim_vx_root}/prebuilt-sdk/x86_64_linux/lib:$LD_LIBRARY_PATH +$ cd ${tim_vx_root}/host_build/install/bin +$ ./grpc_platform_server 0.0.0.0:50051 +``` +4. Run demo + +Open a new terminal +```shell +$ export LD_LIBRARY_PATH=${tim_vx_root}/host_build/install/lib:${tim_vx_root}/prebuilt-sdk/x86_64_linux/lib:$LD_LIBRARY_PATH +$ cd ${tim_vx_root}/host_build/install/bin +$ ./grpc_multi_device 0.0.0.0:50051 +``` +## Build for device +1. Cross-compile gRPC, see [Cross-compile gRPC](https://github.com/grpc/grpc/blob/master/BUILDING.md#cross-compiling) + +note: You should keep both two install version of gPRC: host and device. + +2. Build tim-vx with host gRPC and device gRPC +```shell +$ cd ${tim_vx_root} +$ mkdir device_build && cd device_build +$ cmake .. \ + -DTIM_VX_ENABLE_PLATFORM=ON \ + -DTIM_VX_ENABLE_GRPC=ON \ + -DTIM_VX_BUILD_EXAMPLES=ON \ + -DCMAKE_PREFIX_PATH=${grpc_host_install_path} \ + -DCMAKE_TOOLCHAIN_FILE=${path_to_tool_chain_file} \ + -DEXTERNAL_VIV_SDK=${tim_vx_root}/prebuilt-sdk/x86_64_linux \ + -DProtobuf_DIR=${grpc_device_install_path}/lib/cmake/protobuf \ + -DgRPC_DIR=${grpc_device_install_path}/lib/cmake/grpc \ + -Dabsl_DIR=${grpc_device_install_path}/lib/cmake/absl +$ make -j4 +$ make install +``` \ No newline at end of file diff --git a/src/tim/vx/platform/grpc/grpc_platform.proto b/src/tim/vx/platform/grpc/grpc_platform.proto new file mode 100644 index 0000000..75111f4 --- /dev/null +++ b/src/tim/vx/platform/grpc/grpc_platform.proto @@ -0,0 +1,120 @@ +syntax = "proto3"; + +package rpc; + +service GRPCPlatform { + rpc Enumerate(EmptyMsg) returns (DeviceCount) {} + + rpc CreateExecutor(Device) returns (Executor) {} + + rpc CreateExecutable(GraphInfo) returns (Executable) {} + + rpc AllocateTensor(TensorInfo) returns (Tensor) {} + + rpc SetInput(IOTensor) returns (Status) {} + + rpc SetOutput(IOTensor) returns (Status) {} + + rpc Submit(Executable) returns (Status) {} + + rpc Trigger(Executor) returns (Status) {} + + rpc CopyDataToTensor(TensorData) returns (Status) {} + + rpc CopyDataFromTensor(Tensor) returns (Data) {} + + rpc Clean(EmptyMsg) returns (Status) {} +} + +message EmptyMsg {} + +message DeviceCount { + int32 count = 1; +} + +message Device { + int32 device = 1; +} + +message Executor { + int32 executor = 1; +} + +message Executable { + int32 executable = 1; +} + +message Tensor { + int32 tensor = 1; +} + +message Data { + bytes data = 1; +} + +message GraphInfo { + int32 executor = 1; + int32 input_size = 2; + int32 output_size = 3; + bytes nbg = 4; +} + +enum DataType { + UNKNOWN = 0; + INT8 = 1; + UINT8 = 2; + INT16 = 3; + UINT16 = 4; + INT32 = 5; + UINT32 = 6; + FLOAT16 = 7; + FLOAT32 = 8; + INT64 = 9; + BOOL8 = 10; +} + +enum TensorAttr { + INVALID = 0; + INPUT = 1; + OUTPUT = 2; +} + +enum QuantType { + NONE = 0; + ASYMMETRIC = 1; + SYMMETRIC_PER_CHANNEL = 2; +} + +message Quantization { + QuantType quant_type = 1; + int32 channel_dim = 2; + repeated int32 scales = 3; + repeated int32 zero_points = 4; +} + +message TensorSpec { + DataType data_type = 1; + repeated int32 shape = 2; + TensorAttr tensor_attr = 3; + Quantization quant = 4; + +} + +message TensorInfo { + int32 executable = 1; + TensorSpec tensor_spec = 2; +} + +message IOTensor { + int32 tensor = 1; + int32 executable = 2; +} + +message TensorData { + int32 tensor = 1; + bytes data = 2; +} + +message Status { + bool status = 1; +} \ No newline at end of file diff --git a/src/tim/vx/platform/grpc/grpc_platform_client.cc b/src/tim/vx/platform/grpc/grpc_platform_client.cc new file mode 100644 index 0000000..e80a414 --- /dev/null +++ b/src/tim/vx/platform/grpc/grpc_platform_client.cc @@ -0,0 +1,240 @@ +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#include "grpc_platform_client.h" + +namespace { +::rpc::DataType MapDataType(tim::vx::DataType type) { + ::rpc::DataType rpc_type; + switch (type) { + case tim::vx::DataType::FLOAT32: + rpc_type = ::rpc::DataType::FLOAT32; + break; + case tim::vx::DataType::FLOAT16: + rpc_type = ::rpc::DataType::FLOAT16; + break; + case tim::vx::DataType::INT64: + rpc_type = ::rpc::DataType::INT64; + break; + case tim::vx::DataType::INT32: + rpc_type = ::rpc::DataType::INT32; + break; + case tim::vx::DataType::INT16: + rpc_type = ::rpc::DataType::INT16; + break; + case tim::vx::DataType::INT8: + rpc_type = ::rpc::DataType::INT8; + break; + case tim::vx::DataType::UINT32: + rpc_type = ::rpc::DataType::UINT32; + break; + case tim::vx::DataType::UINT16: + rpc_type = ::rpc::DataType::UINT16; + break; + case tim::vx::DataType::UINT8: + rpc_type = ::rpc::DataType::UINT8; + break; + case tim::vx::DataType::BOOL8: + rpc_type = ::rpc::DataType::BOOL8; + break; + default: + std::cout << "unknown tim vx data type" << std::endl; + assert(false); + } + return rpc_type; +} + +::rpc::TensorAttr MapTensorAttr(tim::vx::TensorAttribute attr) { + ::rpc::TensorAttr rpc_attr; + switch (attr) { + case tim::vx::TensorAttribute::INPUT: + rpc_attr = ::rpc::TensorAttr::INPUT; + break; + case tim::vx::TensorAttribute::OUTPUT: + rpc_attr = ::rpc::TensorAttr::OUTPUT; + break; + default: + std::cout << "invalid tim vx tensor attr" << std::endl; + assert(false); + } + return rpc_attr; +} + +::rpc::QuantType MapQuantType(tim::vx::QuantType quant) { + ::rpc::QuantType rpc_quant; + switch (quant) { + case tim::vx::QuantType::NONE: + rpc_quant = ::rpc::QuantType::NONE; + break; + case tim::vx::QuantType::ASYMMETRIC: + rpc_quant = ::rpc::QuantType::ASYMMETRIC; + break; + case tim::vx::QuantType::SYMMETRIC_PER_CHANNEL: + rpc_quant = ::rpc::QuantType::SYMMETRIC_PER_CHANNEL; + break; + default: + std::cout << "invalid tim vx quant type" << std::endl; + assert(false); + } + return rpc_quant; +} + +} // namespace +namespace tim { +namespace vx { +namespace platform { +int32_t GRPCPlatformClient::Enumerate() { + ::grpc::ClientContext context; + ::rpc::EmptyMsg emsg; + ::rpc::DeviceCount device_count; + stub_->Enumerate(&context, emsg, &device_count); + + return device_count.count(); +} + +int32_t GRPCPlatformClient::CreateExecutor(int32_t device) { + ::grpc::ClientContext context; + ::rpc::Device device_msg; + device_msg.set_device(device); + ::rpc::Executor executor_msg; + stub_->CreateExecutor(&context, device_msg, &executor_msg); + + return executor_msg.executor(); +} + +int32_t GRPCPlatformClient::CreateExecutable(int32_t executor, + const std::vector& nbg, + int32_t input_size, + int32_t output_size) { + ::grpc::ClientContext context; + ::rpc::GraphInfo graph_info_msg; + graph_info_msg.set_executor(executor); + graph_info_msg.set_input_size(input_size); + graph_info_msg.set_output_size(output_size); + std::string nbg_str(nbg.data(), nbg.size()); + graph_info_msg.set_nbg(nbg_str); + ::rpc::Executable executable_msg; + stub_->CreateExecutable(&context, graph_info_msg, &executable_msg); + + return executable_msg.executable(); +} + +int32_t GRPCPlatformClient::AllocateTensor(int32_t executable, + const tim::vx::TensorSpec& spec) { + ::grpc::ClientContext context; + ::rpc::TensorInfo tensor_info_msg; + ::rpc::Tensor tensor_msg; + tensor_info_msg.set_executable(executable); + tensor_info_msg.mutable_tensor_spec()->set_data_type( + MapDataType(spec.datatype_)); + tensor_info_msg.mutable_tensor_spec()->set_tensor_attr( + MapTensorAttr(spec.attr_)); + tensor_info_msg.mutable_tensor_spec()->mutable_quant()->set_quant_type( + MapQuantType(spec.quantization_.Type())); + for (uint32_t s : spec.shape_) { + tensor_info_msg.mutable_tensor_spec()->add_shape(s); + } + + stub_->AllocateTensor(&context, tensor_info_msg, &tensor_msg); + return tensor_msg.tensor(); +} + +bool GRPCPlatformClient::SetInput(int32_t executable, int32_t tensor) { + ::grpc::ClientContext context; + ::rpc::IOTensor in_tensor_msg; + ::rpc::Status status_msg; + in_tensor_msg.set_executable(executable); + in_tensor_msg.set_tensor(tensor); + + stub_->SetInput(&context, in_tensor_msg, &status_msg); + return status_msg.status(); +} + +bool GRPCPlatformClient::SetOutput(int32_t executable, int32_t tensor) { + ::grpc::ClientContext context; + ::rpc::IOTensor out_tensor_msg; + ::rpc::Status status_msg; + out_tensor_msg.set_executable(executable); + out_tensor_msg.set_tensor(tensor); + + stub_->SetOutput(&context, out_tensor_msg, &status_msg); + return status_msg.status(); +} + +bool GRPCPlatformClient::Submit(int32_t executable) { + ::grpc::ClientContext context; + ::rpc::Executable executable_mag; + ::rpc::Status status_msg; + executable_mag.set_executable(executable); + + stub_->Submit(&context, executable_mag, &status_msg); + return status_msg.status(); +} + +bool GRPCPlatformClient::Trigger(int32_t executor) { + ::grpc::ClientContext context; + ::rpc::Executor executor_mag; + ::rpc::Status status_msg; + executor_mag.set_executor(executor); + + stub_->Trigger(&context, executor_mag, &status_msg); + return status_msg.status(); +} + +bool GRPCPlatformClient::CopyDataToTensor(int32_t tensor, const void* data, + int32_t length) { + ::grpc::ClientContext context; + ::rpc::TensorData tensor_data_msg; + ::rpc::Status status_msg; + tensor_data_msg.set_tensor(tensor); + std::string data_str(reinterpret_cast(data), length); + tensor_data_msg.set_data(data_str); + + stub_->CopyDataToTensor(&context, tensor_data_msg, &status_msg); + return status_msg.status(); +} + +bool GRPCPlatformClient::CopyDataFromTensor(int32_t tensor, void* data) { + ::grpc::ClientContext context; + ::rpc::Tensor tensor_msg; + ::rpc::Data data_msg; + ::rpc::Status status_msg; + tensor_msg.set_tensor(tensor); + + stub_->CopyDataFromTensor(&context, tensor_msg, &data_msg); + std::string data_str = data_msg.data(); + memcpy(data, data_str.data(), data_str.size()); + return (data != nullptr); +} + +void GRPCPlatformClient::Clean() { + ::grpc::ClientContext context; + ::rpc::EmptyMsg emsg; + ::rpc::Status status_msg; + + stub_->Clean(&context, emsg, &status_msg); +} + +} // namespace platform +} // namespace vx +} // namespace tim diff --git a/src/tim/vx/platform/grpc/grpc_platform_client.h b/src/tim/vx/platform/grpc/grpc_platform_client.h new file mode 100644 index 0000000..f29ebed --- /dev/null +++ b/src/tim/vx/platform/grpc/grpc_platform_client.h @@ -0,0 +1,77 @@ +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#ifndef _GRPC_PLATFORM_CLIENT_ +#define _GRPC_PLATFORM_CLIENT_ + +#include +#include +#include +#include +#include + +#include "tim/vx/context.h" +#include "tim/vx/graph.h" +#include "tim/vx/ops.h" +#include "tim/vx/types.h" +#include "grpc_platform.grpc.pb.h" + +namespace tim { +namespace vx { +namespace platform { +class GRPCPlatformClient { + public: + GRPCPlatformClient(const std::string& port) + : stub_(rpc::GRPCPlatform::NewStub( + grpc::CreateChannel(port, grpc::InsecureChannelCredentials()))) {} + + int32_t Enumerate(); + + int32_t CreateExecutor(int32_t device); + + int32_t CreateExecutable(int32_t executor, const std::vector& nbg, + int32_t input_size, int32_t output_size); + + int32_t AllocateTensor(int32_t executable, const tim::vx::TensorSpec& spec); + + bool SetInput(int32_t executable, int32_t tensor); + + bool SetOutput(int32_t executable, int32_t tensor); + + bool Submit(int32_t executable); + + bool Trigger(int32_t executor); + + bool CopyDataToTensor(int32_t tensor, const void* data, int32_t length); + + bool CopyDataFromTensor(int32_t tensor, void* data); + + void Clean(); + + private: + std::unique_ptr stub_; +}; +} // namespace platform +} // namespace vx +} // namespace tim +#endif \ No newline at end of file diff --git a/src/tim/vx/platform/grpc/grpc_platform_server.cc b/src/tim/vx/platform/grpc/grpc_platform_server.cc new file mode 100644 index 0000000..943fff2 --- /dev/null +++ b/src/tim/vx/platform/grpc/grpc_platform_server.cc @@ -0,0 +1,348 @@ +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#include +#include +#include +#include +#include + +#include "grpc_platform.grpc.pb.h" +#include "tim/vx/platform/native.h" +#include "vsi_nn_pub.h" +#ifdef ENABLE_PLATFORM_LITE +#include "tim/vx/platform/lite/lite_native.h" +#endif + +std::unordered_map> + device_table; +std::unordered_map> + executor_table; +std::vector> executable_table; +std::vector> tensor_table; + +namespace { +tim::vx::DataType MapDataType(::rpc::DataType type) { + tim::vx::DataType vx_type; + switch (type) { + case ::rpc::DataType::FLOAT32: + vx_type = tim::vx::DataType::FLOAT32; + break; + case ::rpc::DataType::FLOAT16: + vx_type = tim::vx::DataType::FLOAT16; + break; + case ::rpc::DataType::INT64: + vx_type = tim::vx::DataType::INT64; + break; + case ::rpc::DataType::INT32: + vx_type = tim::vx::DataType::INT32; + break; + case ::rpc::DataType::INT16: + vx_type = tim::vx::DataType::INT16; + break; + case ::rpc::DataType::INT8: + vx_type = tim::vx::DataType::INT8; + break; + case ::rpc::DataType::UINT32: + vx_type = tim::vx::DataType::UINT32; + break; + case ::rpc::DataType::UINT16: + vx_type = tim::vx::DataType::UINT16; + break; + case ::rpc::DataType::UINT8: + vx_type = tim::vx::DataType::UINT8; + break; + case ::rpc::DataType::BOOL8: + vx_type = tim::vx::DataType::BOOL8; + break; + default: + std::cout << "unknown data type" << std::endl; + assert(false); + } + return vx_type; +} + +tim::vx::TensorAttribute MapTensorAttr(::rpc::TensorAttr attr) { + tim::vx::TensorAttribute vx_attr; + switch (attr) { + case ::rpc::TensorAttr::INPUT: + vx_attr = tim::vx::TensorAttribute::INPUT; + break; + case ::rpc::TensorAttr::OUTPUT: + vx_attr = tim::vx::TensorAttribute::OUTPUT; + break; + default: + std::cout << "invalid tensor attr" << std::endl; + assert(false); + } + return vx_attr; +} + +tim::vx::QuantType MapQuantType(::rpc::QuantType quant) { + tim::vx::QuantType vx_quant; + switch (quant) { + case ::rpc::QuantType::NONE: + vx_quant = tim::vx::QuantType::NONE; + break; + case ::rpc::QuantType::ASYMMETRIC: + vx_quant = tim::vx::QuantType::ASYMMETRIC; + break; + case ::rpc::QuantType::SYMMETRIC_PER_CHANNEL: + vx_quant = tim::vx::QuantType::SYMMETRIC_PER_CHANNEL; + break; + default: + std::cout << "invalid quant type" << std::endl; + assert(false); + } + return vx_quant; +} +} // namespace + +class GRPCPlatformService final : public ::rpc::GRPCPlatform::Service { + public: + ::grpc::Status Enumerate(::grpc::ServerContext* context, + const ::rpc::EmptyMsg* request, + ::rpc::DeviceCount* response) override { + VSILOGD("------ Calling gRPC Enumerate ------"); + (void)context; + (void)request; + auto devices = tim::vx::platform::NativeDevice::Enumerate(); + response->set_count(devices.size()); + for (int i = 0; i < static_cast(devices.size()); ++i) { + device_table.insert({i, devices[i]}); + } + return ::grpc::Status::OK; + } + + ::grpc::Status CreateExecutor(::grpc::ServerContext* context, + const ::rpc::Device* request, + ::rpc::Executor* response) override { + VSILOGD("------ Calling gRPC CreateExecutor ------"); + (void)context; + int32_t id = request->device(); + auto device = device_table[id]; +#ifdef ENABLE_PLATFORM_LITE + auto executor = + std::make_shared(device); +#else + auto executor = std::make_shared(device); +#endif + executor_table.insert({id, executor}); + response->set_executor(id); + return ::grpc::Status::OK; + } + + ::grpc::Status CreateExecutable(::grpc::ServerContext* context, + const ::rpc::GraphInfo* request, + ::rpc::Executable* response) override { + VSILOGD("------ Calling gRPC CreateExecutable ------"); + (void)context; + int32_t id = request->executor(); + auto executor = executor_table[id]; + std::string nbg_str = request->nbg(); + std::vector nbg_vec(nbg_str.size()); + memcpy(nbg_vec.data(), nbg_str.data(), nbg_str.size()); +#ifdef ENABLE_PLATFORM_LITE + auto executable = std::make_shared( + executor, nbg_vec); +#else + int32_t input_size = request->input_size(); + int32_t output_size = request->output_size(); + auto executable = std::make_shared( + executor, nbg_vec, input_size, output_size); +#endif + executable_table.push_back(executable); + response->set_executable(executable_table.size() - 1); + return ::grpc::Status::OK; + } + + ::grpc::Status AllocateTensor(::grpc::ServerContext* context, + const ::rpc::TensorInfo* request, + ::rpc::Tensor* response) override { + VSILOGD("------ Calling gRPC AllocateTensor ------"); + (void)context; + int32_t id = request->executable(); + auto executable = executable_table[id]; + tim::vx::DataType data_type = + MapDataType(request->tensor_spec().data_type()); + tim::vx::TensorAttribute tensor_attr = + MapTensorAttr(request->tensor_spec().tensor_attr()); + tim::vx::QuantType quant_type = + MapQuantType(request->tensor_spec().quant().quant_type()); + auto shape = request->tensor_spec().shape(); + tim::vx::ShapeType vx_shape(shape.size()); + for (int i = 0; i < shape.size(); ++i) vx_shape[i] = shape[i]; + tim::vx::TensorSpec tensor_spec; + if (quant_type == tim::vx::QuantType::NONE) { + tensor_spec = tim::vx::TensorSpec(data_type, vx_shape, tensor_attr); + } else { + tim::vx::Quantization quantization; + quantization.SetType(quant_type); + quantization.SetChannelDim(request->tensor_spec().quant().channel_dim()); + auto scales = request->tensor_spec().quant().scales(); + auto zero_pionts = request->tensor_spec().quant().zero_points(); + std::vector vx_scales(scales.size()); + std::vector vx_zero_points(zero_pionts.size()); + for (int i = 0; i < scales.size(); ++i) vx_scales[i] = scales[i]; + for (int i = 0; i < zero_pionts.size(); ++i) { + vx_zero_points[i] = zero_pionts[i]; + } + quantization.SetScales(vx_scales); + quantization.SetZeroPoints(vx_zero_points); + + tensor_spec = + tim::vx::TensorSpec(data_type, vx_shape, tensor_attr, quantization); + } + + auto tensor_handle = executable->AllocateTensor(tensor_spec); + tensor_table.push_back(tensor_handle); + response->set_tensor(tensor_table.size() - 1); + + return ::grpc::Status::OK; + } + + ::grpc::Status SetInput(::grpc::ServerContext* context, + const ::rpc::IOTensor* request, + ::rpc::Status* response) override { + VSILOGD("------ Calling gRPC SetInput ------"); + (void)context; + int32_t tensor_id = request->tensor(); + int32_t executable_id = request->executable(); + auto executable = executable_table[executable_id]; + auto tensor_handle = tensor_table[tensor_id]; + if (tensor_handle->GetTensor()->GetSpec().attr_ != + tim::vx::TensorAttribute::INPUT) { + VSILOGE("You are setting a no-input tensor as graph input"); + } + executable->SetInput(tensor_handle); + response->set_status(true); + return ::grpc::Status::OK; + } + + ::grpc::Status SetOutput(::grpc::ServerContext* context, + const ::rpc::IOTensor* request, + ::rpc::Status* response) override { + VSILOGD("------ Calling gRPC SetOutput ------"); + (void)context; + int32_t tensor_id = request->tensor(); + int32_t executable_id = request->executable(); + auto executable = executable_table[executable_id]; + auto tensor_handle = tensor_table[tensor_id]; + if (tensor_handle->GetTensor()->GetSpec().attr_ != + tim::vx::TensorAttribute::OUTPUT) { + VSILOGE("You are setting a no-output tensor as graph output"); + } + executable->SetOutput(tensor_handle); + response->set_status(true); + return ::grpc::Status::OK; + } + + ::grpc::Status Submit(::grpc::ServerContext* context, + const ::rpc::Executable* request, + ::rpc::Status* response) override { + VSILOGD("------ Calling gRPC Submit ------"); + (void)context; + int32_t id = request->executable(); + auto executable = executable_table[id]; + executable->Submit(executable); + response->set_status(true); + return ::grpc::Status::OK; + } + + ::grpc::Status Trigger(::grpc::ServerContext* context, + const ::rpc::Executor* request, + ::rpc::Status* response) override { + VSILOGD("------ Calling gRPC Trigger ------"); + (void)context; + int32_t id = request->executor(); + auto executor = executor_table[id]; + executor->Trigger(); + response->set_status(true); + return ::grpc::Status::OK; + } + + ::grpc::Status CopyDataToTensor(::grpc::ServerContext* context, + const ::rpc::TensorData* request, + ::rpc::Status* response) override { + VSILOGD("------ Calling gRPC CopyDataToTensor ------"); + (void)context; + int32_t id = request->tensor(); + auto tensor_handle = tensor_table[id]; + std::string data_str = request->data(); + bool status = + tensor_handle->CopyDataToTensor(data_str.data(), data_str.size()); + response->set_status(status); + return ::grpc::Status::OK; + } + + ::grpc::Status CopyDataFromTensor(::grpc::ServerContext* context, + const ::rpc::Tensor* request, + ::rpc::Data* response) override { + VSILOGD("------ Calling gRPC CopyDataFromTensor ------"); + (void)context; + int32_t id = request->tensor(); + auto tensor_handle = tensor_table[id]; + size_t data_size = tensor_handle->GetTensor()->GetSpec().GetByteSize(); + void* ptr = malloc(data_size); + bool status = tensor_handle->CopyDataFromTensor(ptr); + if (!status) { + VSILOGE("CopyDataFromTensor fail"); + free(ptr); + return ::grpc::Status::CANCELLED; + } + std::string data_str(reinterpret_cast(ptr), data_size); + response->set_data(std::move(data_str)); + free(ptr); + return ::grpc::Status::OK; + } + + ::grpc::Status Clean(::grpc::ServerContext* context, + const ::rpc::EmptyMsg* request, + ::rpc::Status* response) override { + VSILOGD("------ Calling gRPC Clean ------"); + (void)context; + (void)request; + executor_table.clear(); + executable_table.clear(); + tensor_table.clear(); + response->set_status(true); + return ::grpc::Status::OK; + } +}; + +int main(int argc, char** argv) { + if (argc < 2) { + std::cout << "error: need a port to connect." << std::endl; + return -1; + } + std::string port(argv[1]); + GRPCPlatformService service; + ::grpc::ServerBuilder builder; + builder.AddListeningPort(port, grpc::InsecureServerCredentials()); + builder.RegisterService(&service); + std::unique_ptr<::grpc::Server> server(builder.BuildAndStart()); + std::cout << "Server listening on " << port << std::endl; + server->Wait(); + + return 0; +} \ No newline at end of file diff --git a/src/tim/vx/platform/grpc/grpc_remote.cc b/src/tim/vx/platform/grpc/grpc_remote.cc new file mode 100644 index 0000000..5bba14d --- /dev/null +++ b/src/tim/vx/platform/grpc/grpc_remote.cc @@ -0,0 +1,176 @@ +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#include "tim/vx/platform/grpc/grpc_remote.h" + +#include "tim/vx/platform/platform.h" +#include "grpc_platform_client.h" + +namespace tim { +namespace vx { +namespace platform { + +std::vector> GRPCRemoteDevice::Enumerate( + const std::string& port) { + auto client = std::make_shared(port); + int32_t count = client->Enumerate(); + std::vector> devices; + for (int i = 0; i < count; ++i) { + devices.push_back(std::make_shared(i, client)); + } + return devices; +} + +GRPCRemoteDevice::GRPCRemoteDevice(int32_t id, + std::shared_ptr client) + : client_(client) { + device_id_ = id; +} + +bool GRPCRemoteDevice::Submit(const std::shared_ptr& graph) { + (void)graph; + return false; +} + +bool GRPCRemoteDevice::Trigger(bool async, async_callback cb) { + (void)async; + (void)cb; + return false; +} + +bool GRPCRemoteDevice::DeviceExit() { return false; } + +void GRPCRemoteDevice::WaitDeviceIdle() {} + +void GRPCRemoteDevice::RemoteReset() { client_->Clean(); } + +GRPCRemoteExecutor::GRPCRemoteExecutor(std::shared_ptr device) + : device_(device) { + executor_id_ = std::dynamic_pointer_cast(device) + ->client_->CreateExecutor(device->Id()); +} + +bool GRPCRemoteExecutor::Submit(const std::shared_ptr& executable, + const std::shared_ptr& ref, + bool after) { + (void)executable; + (void)ref; + (void)after; + return false; +} + +bool GRPCRemoteExecutor::Trigger(bool async) { + (void)async; + return std::dynamic_pointer_cast(device_)->client_->Trigger( + executor_id_); +} + +std::shared_ptr GRPCRemoteExecutor::Compile( + const std::shared_ptr& graph) { + size_t inputs_num = graph->InputsTensor().size(); + size_t outputs_num = graph->OutputsTensor().size(); + size_t nbg_size = -1; + + graph->CompileToBinary(nullptr, &nbg_size); + std::vector nbg_buf(nbg_size); + graph->CompileToBinary(nbg_buf.data(), &nbg_size); + + int32_t executable_id = + std::dynamic_pointer_cast(device_) + ->client_->CreateExecutable(executor_id_, nbg_buf, inputs_num, + outputs_num); + + return std::make_shared(executable_id, device_); +} + +int32_t GRPCRemoteExecutor::Id() const { return executor_id_; } + +GRPCRemoteExecutable::GRPCRemoteExecutable(int32_t id, + std::shared_ptr device) + : executable_id_(id), device_(device) {} + +void GRPCRemoteExecutable::SetInput(const std::shared_ptr& th) { + int32_t tensor_id = + std::dynamic_pointer_cast(th)->Id(); + std::dynamic_pointer_cast(device_)->client_->SetInput( + executable_id_, tensor_id); +} + +void GRPCRemoteExecutable::SetOutput(const std::shared_ptr& th) { + int32_t tensor_id = + std::dynamic_pointer_cast(th)->Id(); + std::dynamic_pointer_cast(device_)->client_->SetOutput( + executable_id_, tensor_id); +} + +void GRPCRemoteExecutable::GetOutput( + const std::vector>& th) { + (void)th; +} + +bool GRPCRemoteExecutable::Submit(const std::shared_ptr& ref, + bool after) { + (void)after; + int32_t executable_id = + std::dynamic_pointer_cast(ref)->Id(); + return std::dynamic_pointer_cast(device_)->client_->Submit( + executable_id); +} + +bool GRPCRemoteExecutable::Trigger(bool async) { + (void)async; + return false; +} + +bool GRPCRemoteExecutable::Verify() { return false; } + +std::shared_ptr GRPCRemoteExecutable::AllocateTensor( + const TensorSpec& tensor_spec) { + int32_t tensor_id = + std::dynamic_pointer_cast(device_) + ->client_->AllocateTensor(executable_id_, tensor_spec); + + return std::make_shared(tensor_id, device_); +} + +int32_t GRPCRemoteExecutable::Id() const { return executable_id_; } + +GRPCRemoteTensorHandle::GRPCRemoteTensorHandle(int32_t id, + std::shared_ptr device) + : tensor_id_(id), device_(device) {} + +bool GRPCRemoteTensorHandle::CopyDataToTensor(const void* data, + uint32_t size_in_bytes) { + return std::dynamic_pointer_cast(device_) + ->client_->CopyDataToTensor(tensor_id_, data, size_in_bytes); +} + +bool GRPCRemoteTensorHandle::CopyDataFromTensor(void* data) { + return std::dynamic_pointer_cast(device_) + ->client_->CopyDataFromTensor(tensor_id_, data); +} + +int32_t GRPCRemoteTensorHandle::Id() const { return tensor_id_; } +} // namespace platform +} // namespace vx +} // namespace tim diff --git a/src/tim/vx/platform/lite/lite_native.cc b/src/tim/vx/platform/lite/lite_native.cc new file mode 100644 index 0000000..137823b --- /dev/null +++ b/src/tim/vx/platform/lite/lite_native.cc @@ -0,0 +1,312 @@ +/**************************************************************************** +* +* Copyright (c) 2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#include "tim/vx/platform/lite/lite_native.h" + +#include + +#include "tim/vx/graph.h" +#include "graph_private.h" +#include "vsi_nn_pub.h" + +namespace tim { +namespace vx { +namespace platform { +LiteNativeExecutor::LiteNativeExecutor(const std::shared_ptr& device) { + device_ = device; + context_ = Context::Create(); + database_ = VIP_NULL; + + vip_init(); + vip_query_database(&database_); + nbg_linker_init(database_); +} + +LiteNativeExecutor::~LiteNativeExecutor() { + nbg_destroy_task(task_descriptor_); + nbg_linker_destroy(); + vip_destroy(); +} + +bool LiteNativeExecutor::Submit(const std::shared_ptr& executable, + const std::shared_ptr& ref, + bool after) { + bool success = false; + if (executable == ref) { + tasks_.push_back(executable); + return true; + } + for (size_t i = 0; i < tasks_.size(); i++) { + if (tasks_[i].lock() == ref) { + if (after == true) { + tasks_.insert(tasks_.begin() + i + 1, executable); + success = true; + break; + } else { + tasks_.insert(tasks_.begin() + i, executable); + success = true; + break; + } + } + } + return success; +} + +bool LiteNativeExecutor::Trigger(bool async) { + (void)async; + vip_status_e status = VIP_SUCCESS; + std::vector networks; + for (auto exe : tasks_) { + auto task = exe.lock(); + task->Verify(); + vip_network& network = + std::dynamic_pointer_cast(task)->network_; + networks.push_back(std::move(network)); + } + status = nbg_create_task(networks.size(), networks.data(), &task_descriptor_); + if (status != VIP_SUCCESS) { + VSILOGE("create task descriptor fail"); + return false; + } + status = vip_trigger_task(task_descriptor_); + if (status != VIP_SUCCESS) { + VSILOGE("trigger task descriptor fail"); + return false; + } + status = vip_wait_task(task_descriptor_); + if (status != VIP_SUCCESS) { + VSILOGE("wait task descriptor fail"); + nbg_gen_capture(networks.size(), networks.data()); + return false; + } + return true; +} + +std::shared_ptr LiteNativeExecutor::Compile( + const std::shared_ptr& graph) { + GraphImpl* graphimp = dynamic_cast(graph.get()); + IDevice::device_id_t id = device_->Id(); + vxSetGraphAttribute(graphimp->graph()->g, VX_GRAPH_DEVICE_INDEX_VIV, + (void*)(&id), sizeof(id)); + size_t bin_size = -1; + graph->CompileToBinary(nullptr, &bin_size); + std::vector nb_buf; + nb_buf.resize(bin_size); + graph->CompileToBinary(nb_buf.data(), &bin_size); + return std::make_shared(shared_from_this(), nb_buf); +} + +LiteNativeExecutable::LiteNativeExecutable( + const std::shared_ptr& executor, + const std::vector& nb_buf) { + executor_ = executor; + context_ = executor->Contex(); + nb_graph_ = context_->CreateGraph(); + nbg_create_network(nb_buf.data(), nb_buf.size(), + VIP_CREATE_NETWORK_FROM_MEMORY, &network_); + input_count_ = 0; + output_count_ = 0; + coeff_ = nullptr; + command_ = nullptr; + memory_pool_ = nullptr; + others_ = nullptr; + pre_command_ = nullptr; + + /* prepare vip network */ + vip_status_e status = VIP_SUCCESS; + nbg_network_memory_size_t buffer_size; + nbg_network_memory_buffer_t buffer; + vip_memory_t coeff_buffer; + vip_memory_t cmd_buffer; + vip_memory_t pre_cmd_buffer; + vip_memory_t pool_buffer; + vip_memory_t others_buffer; + nbg_query_network(network_, VIP_NETWORK_PROP_MEMORY_SIZE, &buffer_size); + + vip_allocate_videomemory(buffer_size.coeff, &coeff_); + vip_allocate_videomemory(buffer_size.command, &command_); + vip_allocate_videomemory(buffer_size.memory_pool, &memory_pool_); + vip_allocate_videomemory(buffer_size.others, &others_); + vip_allocate_videomemory(buffer_size.pre_command, &pre_command_); + + SetBuffer(&coeff_buffer, coeff_); + SetBuffer(&cmd_buffer, command_); + SetBuffer(&pre_cmd_buffer, pre_command_); + SetBuffer(&pool_buffer, memory_pool_); + SetBuffer(&others_buffer, others_); + + buffer.coeff = &coeff_buffer; + buffer.command = &cmd_buffer; + buffer.memory_pool = &pool_buffer; + buffer.others = &others_buffer; + buffer.pre_command = &pre_cmd_buffer; + buffer.dma_command = nullptr; + status = nbg_prepare_network(network_, &buffer); + + vip_flush_videomemory(coeff_, VIP_BUFFER_OPER_TYPE_FLUSH); + vip_flush_videomemory(command_, VIP_BUFFER_OPER_TYPE_FLUSH); + vip_flush_videomemory(pre_command_, VIP_BUFFER_OPER_TYPE_FLUSH); + vip_flush_videomemory(memory_pool_, VIP_BUFFER_OPER_TYPE_FLUSH); + vip_flush_videomemory(others_, VIP_BUFFER_OPER_TYPE_FLUSH); + + if (status != VIP_SUCCESS) { + VSILOGE("failed to prepare network"); + assert(false); + } +} + +LiteNativeExecutable::~LiteNativeExecutable() { + nbg_finish_network(network_); + nbg_destroy_network(network_); + if (coeff_) { + vip_free_videomemory(coeff_); + coeff_ = nullptr; + } + if (command_) { + vip_free_videomemory(command_); + command_ = nullptr; + } + if (memory_pool_) { + vip_free_videomemory(memory_pool_); + memory_pool_ = nullptr; + } + if (others_) { + vip_free_videomemory(others_); + others_ = nullptr; + } + if (pre_command_) { + vip_free_videomemory(pre_command_); + pre_command_ = nullptr; + } +} + +void LiteNativeExecutable::SetInput(const std::shared_ptr& th) { + vip_status_e status = VIP_SUCCESS; + gcvip_videomemory_t* mem = + std::dynamic_pointer_cast(th)->tensor_buffer_; + vip_memory_t buffer; + SetBuffer(&buffer, mem); + + status = nbg_set_input(network_, input_count_, &buffer); + if (status != VIP_SUCCESS) { + VSILOGE("failed to set input: %d", input_count_); + assert(false); + } + ++input_count_; +} + +void LiteNativeExecutable::SetOutput(const std::shared_ptr& th) { + vip_status_e status = VIP_SUCCESS; + gcvip_videomemory_t* mem = + std::dynamic_pointer_cast(th)->tensor_buffer_; + vip_memory_t buffer; + SetBuffer(&buffer, mem); + + status = nbg_set_output(network_, output_count_, &buffer); + if (status != VIP_SUCCESS) { + VSILOGE("failed to set output: %d", output_count_); + assert(false); + } + ++output_count_; +} + +void LiteNativeExecutable::GetOutput( + const std::vector>& th) { + (void)th; +} + +bool LiteNativeExecutable::Submit(const std::shared_ptr& ref, + bool after) { + bool status = false; + std::shared_ptr executable = shared_from_this(); + status = Executor()->Submit(executable, ref, after); + return status; +} + +bool LiteNativeExecutable::Trigger(bool async) { + (void)async; + return false; +} + +bool LiteNativeExecutable::Verify() { + int32_t input_count = 0; + nbg_query_network(network_, VIP_NETWORK_PROP_INPUT_COUNT, &input_count); + if (input_count != input_count_) { + VSILOGE("input count mismatch, required: %d, provided: %d", input_count, + input_count_); + return false; + } + int32_t output_count = 0; + nbg_query_network(network_, VIP_NETWORK_PROP_OUTPUT_COUNT, &output_count); + if (output_count != output_count_) { + VSILOGE("output count mismatch, required: %d, provided: %d", output_count, + output_count_); + return false; + } + + return true; +} + +std::shared_ptr LiteNativeExecutable::AllocateTensor( + const TensorSpec& tensor_spec) { + auto tensor = nb_graph_->CreateTensor(tensor_spec); + return std::make_shared(tensor); +} + +void LiteNativeExecutable::SetBuffer(vip_memory_t* dst, + gcvip_videomemory_t* src) { + if (dst && src) { + dst->cpu_logical = src->cpu_logical; + dst->npu_physical = src->npu_physical; + dst->size = src->size; + } +} + +LiteNativeTensorHandle::LiteNativeTensorHandle( + const std::shared_ptr& tensor) { + tensor_ = tensor; + uint32_t size = tensor->GetSpec().GetByteSize(); + vip_allocate_videomemory(size, &tensor_buffer_); +} + +LiteNativeTensorHandle::~LiteNativeTensorHandle() { + if (tensor_buffer_) { + vip_free_videomemory(tensor_buffer_); + tensor_buffer_ = nullptr; + } +} + +bool LiteNativeTensorHandle::CopyDataToTensor(const void* data, + uint32_t size_in_bytes) { + memcpy(tensor_buffer_->cpu_logical, data, size_in_bytes); + return true; +} + +bool LiteNativeTensorHandle::CopyDataFromTensor(void* data) { + memcpy(data, tensor_buffer_->cpu_logical, tensor_buffer_->size); + return true; +} + +} // namespace platform +} // namespace vx +} // namespace tim diff --git a/src/tim/vx/platform/native.cc b/src/tim/vx/platform/native.cc index 45e352e..86c2ab2 100644 --- a/src/tim/vx/platform/native.cc +++ b/src/tim/vx/platform/native.cc @@ -28,33 +28,37 @@ namespace tim { namespace vx { namespace platform { -std::shared_ptr Compile(const std::shared_ptr& graph, const std::shared_ptr& executor) { +std::shared_ptr Compile( + const std::shared_ptr& graph, + const std::shared_ptr& executor) { return executor->Compile(graph); } -std::shared_ptr CreateExecutableSet(const std::vector>& executables) { +std::shared_ptr CreateExecutableSet( + const std::vector>& executables) { ExecutableSet* executable_set = new ExecutableSet(executables); std::shared_ptr executable(executable_set); return executable; } -IDevice::device_id_t IDevice::Id() const { - return device_id_; -} +IDevice::device_id_t IDevice::Id() const { return device_id_; } + +void IDevice::RemoteReset() {} NativeDeviceImpl::NativeDeviceImpl(device_id_t id) { - vip_device_ = std::make_unique (id); + vip_device_ = std::make_unique(id); device_id_ = id; } bool NativeDeviceImpl::Submit(const std::shared_ptr& graph) { - GraphImpl* graphimp = dynamic_cast (graph.get()); // hack to downcast + GraphImpl* graphimp = + dynamic_cast(graph.get()); // hack to downcast vsi_graph_v_.push_back(graphimp->graph()); return true; } bool NativeDeviceImpl::Trigger(bool async, async_callback cb) { -// extract graph from tasks + // extract graph from tasks (void)async; bool status = false; while (!vsi_graph_v_.empty()) { @@ -65,21 +69,18 @@ bool NativeDeviceImpl::Trigger(bool async, async_callback cb) { return status; } -void NativeDeviceImpl::WaitDeviceIdle() { - vip_device_->WaitThreadIdle(); -} +void NativeDeviceImpl::WaitDeviceIdle() { vip_device_->WaitThreadIdle(); } -bool NativeDeviceImpl::DeviceExit() { - return vip_device_->ThreadExit(); -} +bool NativeDeviceImpl::DeviceExit() { return vip_device_->ThreadExit(); } std::vector> NativeDevice::Enumerate() { std::vector> device_v; device_id_t deviceCount = 0; vsi_nn_context_t context; context = vsi_nn_CreateContext(); - vxQueryContext(context->c, VX_CONTEXT_DEVICE_COUNT_VIV, &deviceCount, sizeof(deviceCount)); - std::cout<< "Device count = "<< deviceCount <c, VX_CONTEXT_DEVICE_COUNT_VIV, &deviceCount, + sizeof(deviceCount)); + std::cout << "Device count = " << deviceCount << std::endl; for (device_id_t i = 0; i < deviceCount; i++) { IDevice* local_device = new NativeDeviceImpl(i); std::shared_ptr local_device_sp(local_device); @@ -89,24 +90,25 @@ std::vector> NativeDevice::Enumerate() { return device_v; } -std::shared_ptr IExecutable::NBGraph() const { - return nb_graph_; -} +std::shared_ptr IExecutable::NBGraph() const { return nb_graph_; } std::shared_ptr IExecutable::Executor() const { auto executor = executor_.lock(); if (!executor) { - std::cout<< "Executor unable to lock weak_ptr"; + std::cout << "Executor unable to lock weak_ptr"; } return executor; } -NativeExecutable::NativeExecutable(const std::shared_ptr& executor, const std::vector& nb_buf, size_t inputs, size_t outputs) { +NativeExecutable::NativeExecutable(const std::shared_ptr& executor, + const std::vector& nb_buf, + size_t inputs, size_t outputs) { executor_ = executor; context_ = executor->Contex(); nb_graph_ = context_->CreateGraph(); nb_buf_ = nb_buf; - nb_node_ = nb_graph_->CreateOperation(nb_buf_.data(), inputs, outputs); + nb_node_ = nb_graph_->CreateOperation(nb_buf_.data(), + inputs, outputs); } void NativeExecutable::SetInput(const std::shared_ptr& th) { @@ -117,11 +119,13 @@ void NativeExecutable::SetOutput(const std::shared_ptr& th) { nb_node_->BindOutput(th->GetTensor()); } -void NativeExecutable::GetOutput(const std::vector>& th) { +void NativeExecutable::GetOutput( + const std::vector>& th) { (void)th; } -bool NativeExecutable::Submit(const std::shared_ptr& ref, bool after) { +bool NativeExecutable::Submit(const std::shared_ptr& ref, + bool after) { bool status = false; std::shared_ptr executable = shared_from_this(); status = Executor()->Submit(executable, ref, after); @@ -138,18 +142,18 @@ bool NativeExecutable::Trigger(bool async) { return status; } -std::shared_ptr NativeExecutable::AllocateTensor(const TensorSpec& tensor_spec) { +std::shared_ptr NativeExecutable::AllocateTensor( + const TensorSpec& tensor_spec) { auto tensor = nb_graph_->CreateTensor(tensor_spec); ITensorHandle* tensor_handle = new NativeTensorHandle(tensor); - std::shared_ptr tensor_handle_sp (tensor_handle); + std::shared_ptr tensor_handle_sp(tensor_handle); return tensor_handle_sp; } -bool NativeExecutable::Verify() { - return nb_graph_->Compile(); -} +bool NativeExecutable::Verify() { return nb_graph_->Compile(); } -ExecutableSet::ExecutableSet(const std::vector>& executables) { +ExecutableSet::ExecutableSet( + const std::vector>& executables) { executables_ = executables; executor_ = executables[0]->Executor(); } @@ -162,11 +166,13 @@ void ExecutableSet::SetOutput(const std::shared_ptr& th) { (void)th; } -void ExecutableSet::GetOutput(const std::vector>& th) { +void ExecutableSet::GetOutput( + const std::vector>& th) { (void)th; } -bool ExecutableSet::Submit(const std::shared_ptr& ref, bool after) { +bool ExecutableSet::Submit(const std::shared_ptr& ref, + bool after) { bool status = false; std::shared_ptr executable = shared_from_this(); status = Executor()->Submit(executable, ref, after); @@ -177,7 +183,7 @@ bool ExecutableSet::Trigger(bool async) { (void)async; bool status = false; auto device = Executor()->Device(); - for ( auto executable : executables_ ) { + for (auto executable : executables_) { device->Submit(executable->NBGraph()); } status = device->Trigger(); @@ -185,9 +191,10 @@ bool ExecutableSet::Trigger(bool async) { return status; } -std::shared_ptr ExecutableSet::AllocateTensor(const TensorSpec& tensor_spec) { +std::shared_ptr ExecutableSet::AllocateTensor( + const TensorSpec& tensor_spec) { std::shared_ptr tensor_handle_sp; - (void) tensor_spec; + (void)tensor_spec; return tensor_handle_sp; } @@ -197,31 +204,32 @@ std::vector> ExecutableSet::Executables() const { bool ExecutableSet::Verify() { bool status = false; - for ( auto executable : executables_ ) { + for (auto executable : executables_) { status = executable->Verify(); } return status; } -std::shared_ptr IExecutor::Contex() const { - return context_; -} +std::shared_ptr IExecutor::Contex() const { return context_; } NativeExecutor::NativeExecutor(const std::shared_ptr& device) { device_ = device; context_ = Context::Create(); } -NativeExecutor::NativeExecutor(const std::shared_ptr& device, const std::shared_ptr& context) { +NativeExecutor::NativeExecutor(const std::shared_ptr& device, + const std::shared_ptr& context) { device_ = device; context_ = context; } -bool NativeExecutor::Submit(const std::shared_ptr& executable, const std::shared_ptr& ref, bool after) { +bool NativeExecutor::Submit(const std::shared_ptr& executable, + const std::shared_ptr& ref, + bool after) { bool success = false; success = executable->Verify(); if (success == false) { - std::cout<<"Executable NBG compile failed"; + std::cout << "Executable NBG compile failed"; return false; } if (executable == ref) { @@ -231,12 +239,11 @@ bool NativeExecutor::Submit(const std::shared_ptr& executable, cons for (size_t i = 0; i < tasks_.size(); i++) { if (tasks_[i].lock() == ref) { if (after == true) { - tasks_.insert(tasks_.begin()+i+1, executable); + tasks_.insert(tasks_.begin() + i + 1, executable); success = true; break; - } - else{ - tasks_.insert(tasks_.begin()+i, executable); + } else { + tasks_.insert(tasks_.begin() + i, executable); success = true; break; } @@ -252,7 +259,7 @@ bool NativeExecutor::Trigger(bool async) { tasks_.erase(tasks_.begin()); auto task_ = task.lock(); if (!task_) { - std::cout<< "Task unable to lock weak_ptr"; + std::cout << "Task unable to lock weak_ptr"; } task_->Trigger(); } @@ -260,10 +267,13 @@ bool NativeExecutor::Trigger(bool async) { return true; } -std::shared_ptr NativeExecutor::Compile(const std::shared_ptr& graph) { - GraphImpl* graphimp= dynamic_cast (graph.get()); // hack to downcast +std::shared_ptr NativeExecutor::Compile( + const std::shared_ptr& graph) { + GraphImpl* graphimp = + dynamic_cast(graph.get()); // hack to downcast IDevice::device_id_t id = device_->Id(); - vxSetGraphAttribute(graphimp->graph()->g, VX_GRAPH_DEVICE_INDEX_VIV, (void*)(&id), sizeof(id)); + vxSetGraphAttribute(graphimp->graph()->g, VX_GRAPH_DEVICE_INDEX_VIV, + (void*)(&id), sizeof(id)); size_t bin_size = -1; graph->CompileToBinary(nullptr, &bin_size); std::vector nb_buf; @@ -272,24 +282,22 @@ std::shared_ptr NativeExecutor::Compile(const std::shared_ptrOutputsTensor().size(); graph->CompileToBinary(nb_buf.data(), &bin_size); std::shared_ptr this_sp = shared_from_this(); - IExecutable* executable = new NativeExecutable(this_sp, nb_buf, inputs, outputs); + IExecutable* executable = + new NativeExecutable(this_sp, nb_buf, inputs, outputs); std::shared_ptr executable_sp(executable); return executable_sp; } -std::shared_ptr IExecutor::Device() const { - return device_; -} +std::shared_ptr IExecutor::Device() const { return device_; } -std::shared_ptr ITensorHandle::GetTensor() const { - return tensor_; -} +std::shared_ptr ITensorHandle::GetTensor() const { return tensor_; } NativeTensorHandle::NativeTensorHandle(const std::shared_ptr& tensor) { tensor_ = tensor; } -bool NativeTensorHandle::CopyDataToTensor(const void* data, uint32_t size_in_bytes) { +bool NativeTensorHandle::CopyDataToTensor(const void* data, + uint32_t size_in_bytes) { return tensor_->CopyDataToTensor(data, size_in_bytes); }