374 lines
12 KiB
C++
374 lines
12 KiB
C++
/****************************************************************************
|
|
*
|
|
* Copyright (c) 2020-2023 Vivante Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*
|
|
*****************************************************************************/
|
|
#include "tim/vx/graph.h"
|
|
#include <algorithm>
|
|
|
|
#ifdef ENABLE_TENSOR_CACHE
|
|
#include <openssl/evp.h>
|
|
#include <cstring>
|
|
#endif
|
|
|
|
#include "context_private.h"
|
|
#include "graph_private.h"
|
|
#include "op_impl.h"
|
|
#include "tensor_private.h"
|
|
#include "tim/vx/context.h"
|
|
#include "tim/vx/ops/nbg.h"
|
|
#include "tim/vx/compile_option.h"
|
|
#include "vsi_nn_pub.h"
|
|
|
|
namespace tim {
|
|
namespace vx {
|
|
#ifdef ENABLE_TENSOR_CACHE
|
|
#define MD5_SECRET_LEN_16 (16)
|
|
#define MD5_BYTE_STRING_LEN (4)
|
|
const std::string calculateMd5Secret32(const std::string& src) {
|
|
std::string md5String;
|
|
EVP_MD_CTX *mdctx;
|
|
const EVP_MD *md;
|
|
uint32_t md_len;
|
|
unsigned char md_value[MD5_SECRET_LEN_16] = {0};
|
|
char tmp[MD5_BYTE_STRING_LEN] = {0};
|
|
|
|
md = EVP_md5();
|
|
if (md == NULL) {
|
|
printf("Unknown EVP_md5 message.");
|
|
}
|
|
mdctx = EVP_MD_CTX_new();
|
|
if (!EVP_DigestInit_ex(mdctx, md, NULL)) {
|
|
printf("EVP_MD_CTX initialization failed.");
|
|
EVP_MD_CTX_free(mdctx);
|
|
}
|
|
if (!EVP_DigestUpdate(mdctx, src.c_str(), src.size())) {
|
|
printf("EVP_MD_CTX update failed.");
|
|
EVP_MD_CTX_free(mdctx);
|
|
}
|
|
if (!EVP_DigestFinal_ex(mdctx, md_value, &md_len)) {
|
|
printf("EVP_MD_CTX finalization failed.");
|
|
EVP_MD_CTX_free(mdctx);
|
|
}
|
|
EVP_MD_CTX_free(mdctx);
|
|
|
|
for (int i = 0; i < MD5_SECRET_LEN_16; ++i) {
|
|
memset(tmp, 0x00, sizeof(tmp));
|
|
snprintf(tmp, sizeof(tmp), "%02X", md_value[i]);
|
|
md5String += tmp;
|
|
}
|
|
return md5String;
|
|
}
|
|
#endif
|
|
|
|
const std::vector<std::shared_ptr<Tensor>> Graph::GetConstantInputs() const {
|
|
std::vector<std::shared_ptr<Tensor>> const_inputs;
|
|
for (auto op : op_vector_) {
|
|
auto const_i = op->ConstantInputsTensor();
|
|
const_inputs.insert(const_inputs.end(), const_i.begin(), const_i.end());
|
|
}
|
|
return const_inputs;
|
|
}
|
|
|
|
GraphImpl::GraphImpl(ContextImpl* context, const CompileOption& options)
|
|
: context_(context),
|
|
graph_(vsi_nn_CreateGraph(context_->context(), 0, 0)),
|
|
tensor_placeholder_(nullptr),
|
|
not_consumed_input_cnt_(0),
|
|
not_consumed_output_cnt_(0),
|
|
options_(options){}
|
|
|
|
GraphImpl::~GraphImpl() { vsi_nn_ReleaseGraph(&graph_); }
|
|
|
|
#ifdef ENABLE_TENSOR_CACHE
|
|
std::map<std::string, std::shared_ptr<tim::vx::Tensor>>& GraphImpl::GetTensorCacheMap() {
|
|
return cached_tensor_;
|
|
}
|
|
|
|
const std::string GraphImpl::CalculateCacheKey(const TensorSpec& spec, const void* data) {
|
|
std::string md5_key;
|
|
uint32_t data_size = 1;
|
|
for (auto it = spec.shape_.begin(); it != spec.shape_.end(); ++it) {
|
|
data_size *= *it;
|
|
}
|
|
switch (spec.datatype_) {
|
|
case DataType::INT16:
|
|
case DataType::UINT16:
|
|
case DataType::FLOAT16:
|
|
data_size *= 2;
|
|
break;
|
|
case DataType::INT32:
|
|
case DataType::UINT32:
|
|
case DataType::FLOAT32:
|
|
data_size *= 4;
|
|
break;
|
|
case DataType::INT64:
|
|
data_size *= 8;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (data_size < 512) {
|
|
md5_key = calculateMd5Secret32(std::string((const char*)data, data_size));
|
|
} else {
|
|
md5_key = calculateMd5Secret32(
|
|
std::string((const char*)data, 512)); //Take first 512 bytes
|
|
}
|
|
return md5_key;
|
|
}
|
|
|
|
std::shared_ptr<Tensor> GraphImpl::GetTensorFromCache(const TensorSpec& spec, const void* data) {
|
|
std::shared_ptr<tim::vx::Tensor> tensor;
|
|
std::string md5_key = CalculateCacheKey(spec, data);
|
|
if (GetTensorCacheMap().find(md5_key) != GetTensorCacheMap().end() &&
|
|
GetTensorCacheMap()[md5_key]->GetQuantization().Scales() == spec.quantization_.Scales() &&
|
|
GetTensorCacheMap()[md5_key]->GetQuantization().ZeroPoints() == spec.quantization_.ZeroPoints()) {
|
|
tensor = GetTensorCacheMap()[md5_key];
|
|
} else {
|
|
tensor = std::make_shared<TensorImpl>(this, spec, data);
|
|
GetTensorCacheMap()[md5_key] = tensor;
|
|
}
|
|
return tensor;
|
|
}
|
|
#endif
|
|
|
|
void GraphImpl::SetCompileOption(const CompileOption& new_options) {
|
|
options_ = new_options;
|
|
}
|
|
|
|
vsi_nn_graph_t* GraphImpl::graph() { return graph_; }
|
|
|
|
void GraphImpl::AddInput(vsi_nn_tensor_id_t id) {
|
|
if (inputs_.end() == std::find(inputs_.begin(), inputs_.end(), id)) {
|
|
inputs_.push_back(id);
|
|
}
|
|
}
|
|
|
|
void GraphImpl::AddOutput(vsi_nn_tensor_id_t id) {
|
|
if (outputs_.end() == std::find(outputs_.begin(), outputs_.end(), id)) {
|
|
outputs_.push_back(id);
|
|
}
|
|
}
|
|
|
|
void GraphImpl::AddInput(const std::shared_ptr<Tensor>& tensor) {
|
|
if (inputs_tensor_.end() ==
|
|
std::find(inputs_tensor_.begin(), inputs_tensor_.end(), tensor)) {
|
|
inputs_tensor_.push_back(tensor);
|
|
}
|
|
}
|
|
|
|
void GraphImpl::AddOutput(const std::shared_ptr<Tensor>& tensor) {
|
|
if (outputs_tensor_.end() ==
|
|
std::find(outputs_tensor_.begin(), outputs_tensor_.end(), tensor)) {
|
|
outputs_tensor_.push_back(tensor);
|
|
}
|
|
}
|
|
|
|
const std::vector<std::shared_ptr<Tensor>> GraphImpl::InputsTensor() const {
|
|
return inputs_tensor_;
|
|
}
|
|
|
|
const std::vector<std::shared_ptr<Tensor>> GraphImpl::OutputsTensor() const {
|
|
return outputs_tensor_;
|
|
}
|
|
|
|
void GraphImpl::UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
|
|
const Operation* op) {
|
|
for (const auto& added_op : op_vector_) {
|
|
if (added_op.get() == op) {
|
|
tensor_consumers_[tensor].push_back(added_op);
|
|
}
|
|
}
|
|
}
|
|
|
|
void GraphImpl::RenewTensorConsumersMap(
|
|
const std::shared_ptr<Tensor>& org_tensor,
|
|
const std::shared_ptr<Tensor>& dst_tensor, const Operation* op) {
|
|
auto exist_op = std::find_if(
|
|
op_vector_.begin(), op_vector_.end(),
|
|
[op](std::shared_ptr<Operation> oper) { return oper.get() == op; });
|
|
if (exist_op == op_vector_.end()) {
|
|
return; //given op cannot be found
|
|
} else {
|
|
auto consumer_to_remove = tensor_consumers_.find(org_tensor);
|
|
if (consumer_to_remove != tensor_consumers_.end())
|
|
tensor_consumers_.erase(consumer_to_remove);
|
|
tensor_consumers_[dst_tensor].push_back(*exist_op);
|
|
}
|
|
}
|
|
|
|
void GraphImpl::UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
|
|
const Operation* op) {
|
|
for (const auto& added_op : op_vector_) {
|
|
if (added_op.get() == op) {
|
|
tensor_producer_[tensor] = added_op;
|
|
}
|
|
}
|
|
}
|
|
|
|
const std::vector<std::shared_ptr<Operation>> GraphImpl::GetConsumersOp(
|
|
std::shared_ptr<Tensor> tensor) const {
|
|
auto consumers = tensor_consumers_.find(tensor);
|
|
if (tensor_consumers_.end() != consumers) {
|
|
return consumers->second;
|
|
} else {
|
|
VSILOGD("Tensor has no consumers, may be graph output.");
|
|
return {};
|
|
}
|
|
}
|
|
|
|
std::shared_ptr<Operation> GraphImpl::GetProducerOp(
|
|
std::shared_ptr<Tensor> tensor) {
|
|
auto producer = tensor_producer_.find(tensor);
|
|
if (tensor_producer_.end() != producer) {
|
|
return producer->second;
|
|
} else {
|
|
VSILOGD("Tensor has no producer, may be graph input.");
|
|
return {};
|
|
}
|
|
}
|
|
|
|
void GraphImpl::PrintGraph() const { vsi_nn_PrintGraph(this->graph_); }
|
|
|
|
std::shared_ptr<Tensor> GraphImpl::CreateTensor(const TensorSpec& spec,
|
|
const void* data) {
|
|
#ifdef ENABLE_TENSOR_CACHE
|
|
if (spec.attr_ & TensorAttribute::CONSTANT && data != NULL) {
|
|
return GetTensorFromCache(spec, data);
|
|
}
|
|
#endif
|
|
auto tensor = std::make_shared<TensorImpl>(this, spec, data);
|
|
if (spec.attr_ & TensorAttribute::INPUT) {
|
|
this->AddInput(tensor);
|
|
this->AddInput(tensor->GetId());
|
|
this->ProduceInput();
|
|
}
|
|
if (spec.attr_ & TensorAttribute::OUTPUT) {
|
|
this->AddOutput(tensor);
|
|
this->AddOutput(tensor->GetId());
|
|
this->ProduceOutput();
|
|
}
|
|
return tensor;
|
|
}
|
|
|
|
std::shared_ptr<Tensor> GraphImpl::CreateTensor(const TensorSpec& spec,
|
|
const DmaBufferDesc& dmafd) {
|
|
auto tensor = std::make_shared<TensorImpl>(this, spec, dmafd);
|
|
if (spec.attr_ & TensorAttribute::INPUT) {
|
|
this->AddInput(tensor);
|
|
this->AddInput(tensor->GetId());
|
|
this->ProduceInput();
|
|
}
|
|
if (spec.attr_ & TensorAttribute::OUTPUT) {
|
|
this->AddOutput(tensor);
|
|
this->AddOutput(tensor->GetId());
|
|
this->ProduceOutput();
|
|
}
|
|
return tensor;
|
|
}
|
|
|
|
std::shared_ptr<Tensor> GraphImpl::CreateIOTensor(const TensorSpec& spec,
|
|
void* data) {
|
|
auto tensor = std::make_shared<TensorImpl>(this, spec, data);
|
|
if (spec.attr_ & TensorAttribute::INPUT) {
|
|
this->AddInput(tensor);
|
|
this->AddInput(tensor->GetId());
|
|
this->ProduceInput();
|
|
}
|
|
if (spec.attr_ & TensorAttribute::OUTPUT) {
|
|
this->AddOutput(tensor);
|
|
this->AddOutput(tensor->GetId());
|
|
this->ProduceOutput();
|
|
}
|
|
return tensor;
|
|
}
|
|
|
|
std::shared_ptr<Tensor> GraphImpl::CreateTensorPlaceHolder() {
|
|
if (!tensor_placeholder_) {
|
|
tensor_placeholder_ = std::make_shared<TensorPlaceholder>(this);
|
|
}
|
|
|
|
return tensor_placeholder_;
|
|
}
|
|
|
|
bool GraphImpl::Setup() {
|
|
bool status = true;
|
|
|
|
auto major = vsi_nn_GetVersionMajor();
|
|
auto minor = vsi_nn_GetVersionMinor();
|
|
auto patch = vsi_nn_GetVersionPatch();
|
|
|
|
vsi_nn_SetGraphVersion(graph_, major, minor, patch);
|
|
|
|
bool is_fast_mode = options_.isRelaxMode();
|
|
if (is_fast_mode) {
|
|
VSILOGW("Important notice: float model executed in bfloat16 "
|
|
"mode which will have better performance but lower precesion");
|
|
}
|
|
vsi_nn_SetGraphFastMode(graph_, is_fast_mode);
|
|
|
|
#if defined(ENABLE_PLATFORM)
|
|
auto id = options_.getDeviceId();
|
|
vxSetGraphAttribute(graph_->g, VX_GRAPH_DEVICE_INDEX_VIV,
|
|
(void*)(&id), sizeof(id));
|
|
#endif
|
|
|
|
std::call_once(setio_once_, [&status, this]() {
|
|
status = (vsi_nn_SetGraphInputs(this->graph_, this->inputs_.data(),
|
|
this->inputs_.size()) &&
|
|
vsi_nn_SetGraphOutputs(this->graph_, this->outputs_.data(),
|
|
this->outputs_.size()));
|
|
});
|
|
|
|
std::call_once(setup_once_, [&status, this]() {
|
|
status = (VSI_SUCCESS == vsi_nn_SetupGraph(this->graph_, true));
|
|
});
|
|
return status;
|
|
}
|
|
|
|
bool GraphImpl::Compile() {
|
|
bool status = true;
|
|
if (not_consumed_input_cnt_ > 0 ) {
|
|
// Tensor can bind to different operations
|
|
VSILOGW("Graph has free input, INPUT tensor may be created but not consumed.");
|
|
}
|
|
if (not_consumed_output_cnt_ != 0) {
|
|
VSILOGW("Graph has free output, OUTPUT tensor may be created but not consumed.");
|
|
}
|
|
status = Setup();
|
|
std::call_once(verify_graph_once_, [&status, this]() {
|
|
status = (VSI_SUCCESS == vsi_nn_VerifyGraph(this->graph_));
|
|
});
|
|
|
|
return status;
|
|
}
|
|
|
|
bool GraphImpl::CompileToBinary(void* buf, size_t* size) {
|
|
return ((Setup()) && (VSI_SUCCESS == vsi_nn_GenerateNBG(graph_, buf, size)));
|
|
}
|
|
|
|
bool GraphImpl::Run() {
|
|
return ((Compile()) && (VSI_SUCCESS == vsi_nn_RunGraph(graph_)));
|
|
}
|
|
|
|
} // namespace vx
|
|
} // namespace tim
|