TIM-VX/src/tim/vx/platform/lite/lite_native.cc

313 lines
9.7 KiB
C++

/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "tim/vx/platform/lite/lite_native.h"
#include <cassert>
#include "tim/vx/graph.h"
#include "graph_private.h"
#include "vsi_nn_pub.h"
namespace tim {
namespace vx {
namespace platform {
LiteNativeExecutor::LiteNativeExecutor(const std::shared_ptr<IDevice>& device) {
device_ = device;
context_ = Context::Create();
database_ = VIP_NULL;
vip_init();
vip_query_database(&database_);
nbg_linker_init(database_);
}
LiteNativeExecutor::~LiteNativeExecutor() {
nbg_destroy_task(task_descriptor_);
nbg_linker_destroy();
vip_destroy();
}
bool LiteNativeExecutor::Submit(const std::shared_ptr<IExecutable>& executable,
const std::shared_ptr<IExecutable>& ref,
bool after) {
bool success = false;
if (executable == ref) {
tasks_.push_back(executable);
return true;
}
for (size_t i = 0; i < tasks_.size(); i++) {
if (tasks_[i].lock() == ref) {
if (after == true) {
tasks_.insert(tasks_.begin() + i + 1, executable);
success = true;
break;
} else {
tasks_.insert(tasks_.begin() + i, executable);
success = true;
break;
}
}
}
return success;
}
bool LiteNativeExecutor::Trigger(bool async) {
(void)async;
vip_status_e status = VIP_SUCCESS;
std::vector<vip_network> networks;
for (auto exe : tasks_) {
auto task = exe.lock();
task->Verify();
vip_network& network =
std::dynamic_pointer_cast<LiteNativeExecutable>(task)->network_;
networks.push_back(std::move(network));
}
status = nbg_create_task(networks.size(), networks.data(), &task_descriptor_);
if (status != VIP_SUCCESS) {
VSILOGE("create task descriptor fail");
return false;
}
status = vip_trigger_task(task_descriptor_);
if (status != VIP_SUCCESS) {
VSILOGE("trigger task descriptor fail");
return false;
}
status = vip_wait_task(task_descriptor_);
if (status != VIP_SUCCESS) {
VSILOGE("wait task descriptor fail");
nbg_gen_capture(networks.size(), networks.data());
return false;
}
return true;
}
std::shared_ptr<IExecutable> LiteNativeExecutor::Compile(
const std::shared_ptr<Graph>& graph) {
GraphImpl* graphimp = dynamic_cast<GraphImpl*>(graph.get());
IDevice::device_id_t id = device_->Id();
vxSetGraphAttribute(graphimp->graph()->g, VX_GRAPH_DEVICE_INDEX_VIV,
(void*)(&id), sizeof(id));
size_t bin_size = -1;
graph->CompileToBinary(nullptr, &bin_size);
std::vector<char> nb_buf;
nb_buf.resize(bin_size);
graph->CompileToBinary(nb_buf.data(), &bin_size);
return std::make_shared<LiteNativeExecutable>(shared_from_this(), nb_buf);
}
LiteNativeExecutable::LiteNativeExecutable(
const std::shared_ptr<IExecutor>& executor,
const std::vector<char>& nb_buf) {
executor_ = executor;
context_ = executor->Contex();
nb_graph_ = context_->CreateGraph();
nbg_create_network(nb_buf.data(), nb_buf.size(),
VIP_CREATE_NETWORK_FROM_MEMORY, &network_);
input_count_ = 0;
output_count_ = 0;
coeff_ = nullptr;
command_ = nullptr;
memory_pool_ = nullptr;
others_ = nullptr;
pre_command_ = nullptr;
/* prepare vip network */
vip_status_e status = VIP_SUCCESS;
nbg_network_memory_size_t buffer_size;
nbg_network_memory_buffer_t buffer;
vip_memory_t coeff_buffer;
vip_memory_t cmd_buffer;
vip_memory_t pre_cmd_buffer;
vip_memory_t pool_buffer;
vip_memory_t others_buffer;
nbg_query_network(network_, VIP_NETWORK_PROP_MEMORY_SIZE, &buffer_size);
vip_allocate_videomemory(buffer_size.coeff, &coeff_);
vip_allocate_videomemory(buffer_size.command, &command_);
vip_allocate_videomemory(buffer_size.memory_pool, &memory_pool_);
vip_allocate_videomemory(buffer_size.others, &others_);
vip_allocate_videomemory(buffer_size.pre_command, &pre_command_);
SetBuffer(&coeff_buffer, coeff_);
SetBuffer(&cmd_buffer, command_);
SetBuffer(&pre_cmd_buffer, pre_command_);
SetBuffer(&pool_buffer, memory_pool_);
SetBuffer(&others_buffer, others_);
buffer.coeff = &coeff_buffer;
buffer.command = &cmd_buffer;
buffer.memory_pool = &pool_buffer;
buffer.others = &others_buffer;
buffer.pre_command = &pre_cmd_buffer;
buffer.dma_command = nullptr;
status = nbg_prepare_network(network_, &buffer);
vip_flush_videomemory(coeff_, VIP_BUFFER_OPER_TYPE_FLUSH);
vip_flush_videomemory(command_, VIP_BUFFER_OPER_TYPE_FLUSH);
vip_flush_videomemory(pre_command_, VIP_BUFFER_OPER_TYPE_FLUSH);
vip_flush_videomemory(memory_pool_, VIP_BUFFER_OPER_TYPE_FLUSH);
vip_flush_videomemory(others_, VIP_BUFFER_OPER_TYPE_FLUSH);
if (status != VIP_SUCCESS) {
VSILOGE("failed to prepare network");
assert(false);
}
}
LiteNativeExecutable::~LiteNativeExecutable() {
nbg_finish_network(network_);
nbg_destroy_network(network_);
if (coeff_) {
vip_free_videomemory(coeff_);
coeff_ = nullptr;
}
if (command_) {
vip_free_videomemory(command_);
command_ = nullptr;
}
if (memory_pool_) {
vip_free_videomemory(memory_pool_);
memory_pool_ = nullptr;
}
if (others_) {
vip_free_videomemory(others_);
others_ = nullptr;
}
if (pre_command_) {
vip_free_videomemory(pre_command_);
pre_command_ = nullptr;
}
}
void LiteNativeExecutable::SetInput(const std::shared_ptr<ITensorHandle>& th) {
vip_status_e status = VIP_SUCCESS;
gcvip_videomemory_t* mem =
std::dynamic_pointer_cast<LiteNativeTensorHandle>(th)->tensor_buffer_;
vip_memory_t buffer;
SetBuffer(&buffer, mem);
status = nbg_set_input(network_, input_count_, &buffer);
if (status != VIP_SUCCESS) {
VSILOGE("failed to set input: %d", input_count_);
assert(false);
}
++input_count_;
}
void LiteNativeExecutable::SetOutput(const std::shared_ptr<ITensorHandle>& th) {
vip_status_e status = VIP_SUCCESS;
gcvip_videomemory_t* mem =
std::dynamic_pointer_cast<LiteNativeTensorHandle>(th)->tensor_buffer_;
vip_memory_t buffer;
SetBuffer(&buffer, mem);
status = nbg_set_output(network_, output_count_, &buffer);
if (status != VIP_SUCCESS) {
VSILOGE("failed to set output: %d", output_count_);
assert(false);
}
++output_count_;
}
void LiteNativeExecutable::GetOutput(
const std::vector<std::shared_ptr<ITensorHandle>>& th) {
(void)th;
}
bool LiteNativeExecutable::Submit(const std::shared_ptr<IExecutable>& ref,
bool after) {
bool status = false;
std::shared_ptr<IExecutable> executable = shared_from_this();
status = Executor()->Submit(executable, ref, after);
return status;
}
bool LiteNativeExecutable::Trigger(bool async) {
(void)async;
return false;
}
bool LiteNativeExecutable::Verify() {
int32_t input_count = 0;
nbg_query_network(network_, VIP_NETWORK_PROP_INPUT_COUNT, &input_count);
if (input_count != input_count_) {
VSILOGE("input count mismatch, required: %d, provided: %d", input_count,
input_count_);
return false;
}
int32_t output_count = 0;
nbg_query_network(network_, VIP_NETWORK_PROP_OUTPUT_COUNT, &output_count);
if (output_count != output_count_) {
VSILOGE("output count mismatch, required: %d, provided: %d", output_count,
output_count_);
return false;
}
return true;
}
std::shared_ptr<ITensorHandle> LiteNativeExecutable::AllocateTensor(
const TensorSpec& tensor_spec) {
auto tensor = nb_graph_->CreateTensor(tensor_spec);
return std::make_shared<LiteNativeTensorHandle>(tensor);
}
void LiteNativeExecutable::SetBuffer(vip_memory_t* dst,
gcvip_videomemory_t* src) {
if (dst && src) {
dst->cpu_logical = src->cpu_logical;
dst->npu_physical = src->npu_physical;
dst->size = src->size;
}
}
LiteNativeTensorHandle::LiteNativeTensorHandle(
const std::shared_ptr<Tensor>& tensor) {
tensor_ = tensor;
uint32_t size = tensor->GetSpec().GetByteSize();
vip_allocate_videomemory(size, &tensor_buffer_);
}
LiteNativeTensorHandle::~LiteNativeTensorHandle() {
if (tensor_buffer_) {
vip_free_videomemory(tensor_buffer_);
tensor_buffer_ = nullptr;
}
}
bool LiteNativeTensorHandle::CopyDataToTensor(const void* data,
uint32_t size_in_bytes) {
memcpy(tensor_buffer_->cpu_logical, data, size_in_bytes);
return true;
}
bool LiteNativeTensorHandle::CopyDataFromTensor(void* data) {
memcpy(data, tensor_buffer_->cpu_logical, tensor_buffer_->size);
return true;
}
} // namespace platform
} // namespace vx
} // namespace tim