/**************************************************************************** * * Copyright (c) 2020-2023 Vivante Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * *****************************************************************************/ #include "tim/vx/graph.h" #include #ifdef ENABLE_TENSOR_CACHE #include #include #endif #include "context_private.h" #include "graph_private.h" #include "op_impl.h" #include "tensor_private.h" #include "tim/vx/context.h" #include "tim/vx/ops/nbg.h" #include "tim/vx/compile_option.h" #include "vsi_nn_pub.h" namespace tim { namespace vx { #ifdef ENABLE_TENSOR_CACHE #define MD5_SECRET_LEN_16 (16) #define MD5_BYTE_STRING_LEN (4) const std::string calculateMd5Secret32(const std::string& src) { std::string md5String; EVP_MD_CTX *mdctx; const EVP_MD *md; uint32_t md_len; unsigned char md_value[MD5_SECRET_LEN_16] = {0}; char tmp[MD5_BYTE_STRING_LEN] = {0}; md = EVP_md5(); if (md == NULL) { printf("Unknown EVP_md5 message."); } mdctx = EVP_MD_CTX_new(); if (!EVP_DigestInit_ex(mdctx, md, NULL)) { printf("EVP_MD_CTX initialization failed."); EVP_MD_CTX_free(mdctx); } if (!EVP_DigestUpdate(mdctx, src.c_str(), src.size())) { printf("EVP_MD_CTX update failed."); EVP_MD_CTX_free(mdctx); } if (!EVP_DigestFinal_ex(mdctx, md_value, &md_len)) { printf("EVP_MD_CTX finalization failed."); EVP_MD_CTX_free(mdctx); } EVP_MD_CTX_free(mdctx); for (int i = 0; i < MD5_SECRET_LEN_16; ++i) { memset(tmp, 0x00, sizeof(tmp)); snprintf(tmp, sizeof(tmp), "%02X", md_value[i]); md5String += tmp; } return md5String; } #endif const std::vector> Graph::GetConstantInputs() const { std::vector> const_inputs; for (auto op : op_vector_) { auto const_i = op->ConstantInputsTensor(); const_inputs.insert(const_inputs.end(), const_i.begin(), const_i.end()); } return const_inputs; } GraphImpl::GraphImpl(ContextImpl* context, const CompileOption& options) : context_(context), graph_(vsi_nn_CreateGraph(context_->context(), 0, 0)), tensor_placeholder_(nullptr), not_consumed_input_cnt_(0), not_consumed_output_cnt_(0), options_(options){} GraphImpl::~GraphImpl() { vsi_nn_ReleaseGraph(&graph_); } #ifdef ENABLE_TENSOR_CACHE std::map>& GraphImpl::GetTensorCacheMap() { return cached_tensor_; } const std::string GraphImpl::CalculateCacheKey(const TensorSpec& spec, const void* data) { std::string md5_key; uint32_t data_size = 1; for (auto it = spec.shape_.begin(); it != spec.shape_.end(); ++it) { data_size *= *it; } switch (spec.datatype_) { case DataType::INT16: case DataType::UINT16: case DataType::FLOAT16: data_size *= 2; break; case DataType::INT32: case DataType::UINT32: case DataType::FLOAT32: data_size *= 4; break; case DataType::INT64: data_size *= 8; break; default: break; } if (data_size < 512) { md5_key = calculateMd5Secret32(std::string((const char*)data, data_size)); } else { md5_key = calculateMd5Secret32( std::string((const char*)data, 512)); //Take first 512 bytes } return md5_key; } std::shared_ptr GraphImpl::GetTensorFromCache(const TensorSpec& spec, const void* data) { std::shared_ptr tensor; std::string md5_key = CalculateCacheKey(spec, data); if (GetTensorCacheMap().find(md5_key) != GetTensorCacheMap().end() && GetTensorCacheMap()[md5_key]->GetQuantization().Scales() == spec.quantization_.Scales() && GetTensorCacheMap()[md5_key]->GetQuantization().ZeroPoints() == spec.quantization_.ZeroPoints()) { tensor = GetTensorCacheMap()[md5_key]; } else { tensor = std::make_shared(this, spec, data); GetTensorCacheMap()[md5_key] = tensor; } return tensor; } #endif void GraphImpl::SetCompileOption(const CompileOption& new_options) { options_ = new_options; } vsi_nn_graph_t* GraphImpl::graph() { return graph_; } void GraphImpl::AddInput(vsi_nn_tensor_id_t id) { if (inputs_.end() == std::find(inputs_.begin(), inputs_.end(), id)) { inputs_.push_back(id); } } void GraphImpl::AddOutput(vsi_nn_tensor_id_t id) { if (outputs_.end() == std::find(outputs_.begin(), outputs_.end(), id)) { outputs_.push_back(id); } } void GraphImpl::AddInput(const std::shared_ptr& tensor) { if (inputs_tensor_.end() == std::find(inputs_tensor_.begin(), inputs_tensor_.end(), tensor)) { inputs_tensor_.push_back(tensor); } } void GraphImpl::AddOutput(const std::shared_ptr& tensor) { if (outputs_tensor_.end() == std::find(outputs_tensor_.begin(), outputs_tensor_.end(), tensor)) { outputs_tensor_.push_back(tensor); } } const std::vector> GraphImpl::InputsTensor() const { return inputs_tensor_; } const std::vector> GraphImpl::OutputsTensor() const { return outputs_tensor_; } void GraphImpl::UpdateTensorConsumersMap(const std::shared_ptr& tensor, const Operation* op) { for (const auto& added_op : op_vector_) { if (added_op.get() == op) { tensor_consumers_[tensor].push_back(added_op); } } } void GraphImpl::RenewTensorConsumersMap( const std::shared_ptr& org_tensor, const std::shared_ptr& dst_tensor, const Operation* op) { auto exist_op = std::find_if( op_vector_.begin(), op_vector_.end(), [op](std::shared_ptr oper) { return oper.get() == op; }); if (exist_op == op_vector_.end()) { return; //given op cannot be found } else { auto consumer_to_remove = tensor_consumers_.find(org_tensor); if (consumer_to_remove != tensor_consumers_.end()) tensor_consumers_.erase(consumer_to_remove); tensor_consumers_[dst_tensor].push_back(*exist_op); } } void GraphImpl::UpdateTensorProducerMap(const std::shared_ptr& tensor, const Operation* op) { for (const auto& added_op : op_vector_) { if (added_op.get() == op) { tensor_producer_[tensor] = added_op; } } } const std::vector> GraphImpl::GetConsumersOp( std::shared_ptr tensor) const { auto consumers = tensor_consumers_.find(tensor); if (tensor_consumers_.end() != consumers) { return consumers->second; } else { VSILOGD("Tensor has no consumers, may be graph output."); return {}; } } std::shared_ptr GraphImpl::GetProducerOp( std::shared_ptr tensor) { auto producer = tensor_producer_.find(tensor); if (tensor_producer_.end() != producer) { return producer->second; } else { VSILOGD("Tensor has no producer, may be graph input."); return {}; } } void GraphImpl::PrintGraph() const { vsi_nn_PrintGraph(this->graph_); } std::shared_ptr GraphImpl::CreateTensor(const TensorSpec& spec, const void* data) { #ifdef ENABLE_TENSOR_CACHE if (spec.attr_ & TensorAttribute::CONSTANT && data != NULL) { return GetTensorFromCache(spec, data); } #endif auto tensor = std::make_shared(this, spec, data); if (spec.attr_ & TensorAttribute::INPUT) { this->AddInput(tensor); this->AddInput(tensor->GetId()); this->ProduceInput(); } if (spec.attr_ & TensorAttribute::OUTPUT) { this->AddOutput(tensor); this->AddOutput(tensor->GetId()); this->ProduceOutput(); } return tensor; } std::shared_ptr GraphImpl::CreateTensor(const TensorSpec& spec, const DmaBufferDesc& dmafd) { auto tensor = std::make_shared(this, spec, dmafd); if (spec.attr_ & TensorAttribute::INPUT) { this->AddInput(tensor); this->AddInput(tensor->GetId()); this->ProduceInput(); } if (spec.attr_ & TensorAttribute::OUTPUT) { this->AddOutput(tensor); this->AddOutput(tensor->GetId()); this->ProduceOutput(); } return tensor; } std::shared_ptr GraphImpl::CreateIOTensor(const TensorSpec& spec, void* data) { auto tensor = std::make_shared(this, spec, data); if (spec.attr_ & TensorAttribute::INPUT) { this->AddInput(tensor); this->AddInput(tensor->GetId()); this->ProduceInput(); } if (spec.attr_ & TensorAttribute::OUTPUT) { this->AddOutput(tensor); this->AddOutput(tensor->GetId()); this->ProduceOutput(); } return tensor; } std::shared_ptr GraphImpl::CreateTensorPlaceHolder() { if (!tensor_placeholder_) { tensor_placeholder_ = std::make_shared(this); } return tensor_placeholder_; } bool GraphImpl::Setup() { bool status = true; auto major = vsi_nn_GetVersionMajor(); auto minor = vsi_nn_GetVersionMinor(); auto patch = vsi_nn_GetVersionPatch(); vsi_nn_SetGraphVersion(graph_, major, minor, patch); bool is_fast_mode = options_.isRelaxMode(); if (is_fast_mode) { VSILOGW("Important notice: float model executed in bfloat16 " "mode which will have better performance but lower precesion"); } vsi_nn_SetGraphFastMode(graph_, is_fast_mode); #if defined(ENABLE_PLATFORM) auto id = options_.getDeviceId(); vxSetGraphAttribute(graph_->g, VX_GRAPH_DEVICE_INDEX_VIV, (void*)(&id), sizeof(id)); #endif std::call_once(setio_once_, [&status, this]() { status = (vsi_nn_SetGraphInputs(this->graph_, this->inputs_.data(), this->inputs_.size()) && vsi_nn_SetGraphOutputs(this->graph_, this->outputs_.data(), this->outputs_.size())); }); std::call_once(setup_once_, [&status, this]() { status = (VSI_SUCCESS == vsi_nn_SetupGraph(this->graph_, true)); }); return status; } bool GraphImpl::Compile() { bool status = true; if (not_consumed_input_cnt_ > 0 ) { // Tensor can bind to different operations VSILOGW("Graph has free input, INPUT tensor may be created but not consumed."); } if (not_consumed_output_cnt_ != 0) { VSILOGW("Graph has free output, OUTPUT tensor may be created but not consumed."); } status = Setup(); std::call_once(verify_graph_once_, [&status, this]() { status = (VSI_SUCCESS == vsi_nn_VerifyGraph(this->graph_)); }); return status; } bool GraphImpl::CompileToBinary(void* buf, size_t* size) { return ((Setup()) && (VSI_SUCCESS == vsi_nn_GenerateNBG(graph_, buf, size))); } bool GraphImpl::Run() { return ((Compile()) && (VSI_SUCCESS == vsi_nn_RunGraph(graph_))); } } // namespace vx } // namespace tim