TIM-VX/src/tim/vx/graph.cc

374 lines
12 KiB
C++

/****************************************************************************
*
* Copyright (c) 2020-2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "tim/vx/graph.h"
#include <algorithm>
#ifdef ENABLE_TENSOR_CACHE
#include <openssl/evp.h>
#include <cstring>
#endif
#include "context_private.h"
#include "graph_private.h"
#include "op_impl.h"
#include "tensor_private.h"
#include "tim/vx/context.h"
#include "tim/vx/ops/nbg.h"
#include "tim/vx/compile_option.h"
#include "vsi_nn_pub.h"
namespace tim {
namespace vx {
#ifdef ENABLE_TENSOR_CACHE
#define MD5_SECRET_LEN_16 (16)
#define MD5_BYTE_STRING_LEN (4)
const std::string calculateMd5Secret32(const std::string& src) {
std::string md5String;
EVP_MD_CTX *mdctx;
const EVP_MD *md;
uint32_t md_len;
unsigned char md_value[MD5_SECRET_LEN_16] = {0};
char tmp[MD5_BYTE_STRING_LEN] = {0};
md = EVP_md5();
if (md == NULL) {
printf("Unknown EVP_md5 message.");
}
mdctx = EVP_MD_CTX_new();
if (!EVP_DigestInit_ex(mdctx, md, NULL)) {
printf("EVP_MD_CTX initialization failed.");
EVP_MD_CTX_free(mdctx);
}
if (!EVP_DigestUpdate(mdctx, src.c_str(), src.size())) {
printf("EVP_MD_CTX update failed.");
EVP_MD_CTX_free(mdctx);
}
if (!EVP_DigestFinal_ex(mdctx, md_value, &md_len)) {
printf("EVP_MD_CTX finalization failed.");
EVP_MD_CTX_free(mdctx);
}
EVP_MD_CTX_free(mdctx);
for (int i = 0; i < MD5_SECRET_LEN_16; ++i) {
memset(tmp, 0x00, sizeof(tmp));
snprintf(tmp, sizeof(tmp), "%02X", md_value[i]);
md5String += tmp;
}
return md5String;
}
#endif
const std::vector<std::shared_ptr<Tensor>> Graph::GetConstantInputs() const {
std::vector<std::shared_ptr<Tensor>> const_inputs;
for (auto op : op_vector_) {
auto const_i = op->ConstantInputsTensor();
const_inputs.insert(const_inputs.end(), const_i.begin(), const_i.end());
}
return const_inputs;
}
GraphImpl::GraphImpl(ContextImpl* context, const CompileOption& options)
: context_(context),
graph_(vsi_nn_CreateGraph(context_->context(), 0, 0)),
tensor_placeholder_(nullptr),
not_consumed_input_cnt_(0),
not_consumed_output_cnt_(0),
options_(options){}
GraphImpl::~GraphImpl() { vsi_nn_ReleaseGraph(&graph_); }
#ifdef ENABLE_TENSOR_CACHE
std::map<std::string, std::shared_ptr<tim::vx::Tensor>>& GraphImpl::GetTensorCacheMap() {
return cached_tensor_;
}
const std::string GraphImpl::CalculateCacheKey(const TensorSpec& spec, const void* data) {
std::string md5_key;
uint32_t data_size = 1;
for (auto it = spec.shape_.begin(); it != spec.shape_.end(); ++it) {
data_size *= *it;
}
switch (spec.datatype_) {
case DataType::INT16:
case DataType::UINT16:
case DataType::FLOAT16:
data_size *= 2;
break;
case DataType::INT32:
case DataType::UINT32:
case DataType::FLOAT32:
data_size *= 4;
break;
case DataType::INT64:
data_size *= 8;
break;
default:
break;
}
if (data_size < 512) {
md5_key = calculateMd5Secret32(std::string((const char*)data, data_size));
} else {
md5_key = calculateMd5Secret32(
std::string((const char*)data, 512)); //Take first 512 bytes
}
return md5_key;
}
std::shared_ptr<Tensor> GraphImpl::GetTensorFromCache(const TensorSpec& spec, const void* data) {
std::shared_ptr<tim::vx::Tensor> tensor;
std::string md5_key = CalculateCacheKey(spec, data);
if (GetTensorCacheMap().find(md5_key) != GetTensorCacheMap().end() &&
GetTensorCacheMap()[md5_key]->GetQuantization().Scales() == spec.quantization_.Scales() &&
GetTensorCacheMap()[md5_key]->GetQuantization().ZeroPoints() == spec.quantization_.ZeroPoints()) {
tensor = GetTensorCacheMap()[md5_key];
} else {
tensor = std::make_shared<TensorImpl>(this, spec, data);
GetTensorCacheMap()[md5_key] = tensor;
}
return tensor;
}
#endif
void GraphImpl::SetCompileOption(const CompileOption& new_options) {
options_ = new_options;
}
vsi_nn_graph_t* GraphImpl::graph() { return graph_; }
void GraphImpl::AddInput(vsi_nn_tensor_id_t id) {
if (inputs_.end() == std::find(inputs_.begin(), inputs_.end(), id)) {
inputs_.push_back(id);
}
}
void GraphImpl::AddOutput(vsi_nn_tensor_id_t id) {
if (outputs_.end() == std::find(outputs_.begin(), outputs_.end(), id)) {
outputs_.push_back(id);
}
}
void GraphImpl::AddInput(const std::shared_ptr<Tensor>& tensor) {
if (inputs_tensor_.end() ==
std::find(inputs_tensor_.begin(), inputs_tensor_.end(), tensor)) {
inputs_tensor_.push_back(tensor);
}
}
void GraphImpl::AddOutput(const std::shared_ptr<Tensor>& tensor) {
if (outputs_tensor_.end() ==
std::find(outputs_tensor_.begin(), outputs_tensor_.end(), tensor)) {
outputs_tensor_.push_back(tensor);
}
}
const std::vector<std::shared_ptr<Tensor>> GraphImpl::InputsTensor() const {
return inputs_tensor_;
}
const std::vector<std::shared_ptr<Tensor>> GraphImpl::OutputsTensor() const {
return outputs_tensor_;
}
void GraphImpl::UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
const Operation* op) {
for (const auto& added_op : op_vector_) {
if (added_op.get() == op) {
tensor_consumers_[tensor].push_back(added_op);
}
}
}
void GraphImpl::RenewTensorConsumersMap(
const std::shared_ptr<Tensor>& org_tensor,
const std::shared_ptr<Tensor>& dst_tensor, const Operation* op) {
auto exist_op = std::find_if(
op_vector_.begin(), op_vector_.end(),
[op](std::shared_ptr<Operation> oper) { return oper.get() == op; });
if (exist_op == op_vector_.end()) {
return; //given op cannot be found
} else {
auto consumer_to_remove = tensor_consumers_.find(org_tensor);
if (consumer_to_remove != tensor_consumers_.end())
tensor_consumers_.erase(consumer_to_remove);
tensor_consumers_[dst_tensor].push_back(*exist_op);
}
}
void GraphImpl::UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
const Operation* op) {
for (const auto& added_op : op_vector_) {
if (added_op.get() == op) {
tensor_producer_[tensor] = added_op;
}
}
}
const std::vector<std::shared_ptr<Operation>> GraphImpl::GetConsumersOp(
std::shared_ptr<Tensor> tensor) const {
auto consumers = tensor_consumers_.find(tensor);
if (tensor_consumers_.end() != consumers) {
return consumers->second;
} else {
VSILOGD("Tensor has no consumers, may be graph output.");
return {};
}
}
std::shared_ptr<Operation> GraphImpl::GetProducerOp(
std::shared_ptr<Tensor> tensor) {
auto producer = tensor_producer_.find(tensor);
if (tensor_producer_.end() != producer) {
return producer->second;
} else {
VSILOGD("Tensor has no producer, may be graph input.");
return {};
}
}
void GraphImpl::PrintGraph() const { vsi_nn_PrintGraph(this->graph_); }
std::shared_ptr<Tensor> GraphImpl::CreateTensor(const TensorSpec& spec,
const void* data) {
#ifdef ENABLE_TENSOR_CACHE
if (spec.attr_ & TensorAttribute::CONSTANT && data != NULL) {
return GetTensorFromCache(spec, data);
}
#endif
auto tensor = std::make_shared<TensorImpl>(this, spec, data);
if (spec.attr_ & TensorAttribute::INPUT) {
this->AddInput(tensor);
this->AddInput(tensor->GetId());
this->ProduceInput();
}
if (spec.attr_ & TensorAttribute::OUTPUT) {
this->AddOutput(tensor);
this->AddOutput(tensor->GetId());
this->ProduceOutput();
}
return tensor;
}
std::shared_ptr<Tensor> GraphImpl::CreateTensor(const TensorSpec& spec,
const DmaBufferDesc& dmafd) {
auto tensor = std::make_shared<TensorImpl>(this, spec, dmafd);
if (spec.attr_ & TensorAttribute::INPUT) {
this->AddInput(tensor);
this->AddInput(tensor->GetId());
this->ProduceInput();
}
if (spec.attr_ & TensorAttribute::OUTPUT) {
this->AddOutput(tensor);
this->AddOutput(tensor->GetId());
this->ProduceOutput();
}
return tensor;
}
std::shared_ptr<Tensor> GraphImpl::CreateIOTensor(const TensorSpec& spec,
void* data) {
auto tensor = std::make_shared<TensorImpl>(this, spec, data);
if (spec.attr_ & TensorAttribute::INPUT) {
this->AddInput(tensor);
this->AddInput(tensor->GetId());
this->ProduceInput();
}
if (spec.attr_ & TensorAttribute::OUTPUT) {
this->AddOutput(tensor);
this->AddOutput(tensor->GetId());
this->ProduceOutput();
}
return tensor;
}
std::shared_ptr<Tensor> GraphImpl::CreateTensorPlaceHolder() {
if (!tensor_placeholder_) {
tensor_placeholder_ = std::make_shared<TensorPlaceholder>(this);
}
return tensor_placeholder_;
}
bool GraphImpl::Setup() {
bool status = true;
auto major = vsi_nn_GetVersionMajor();
auto minor = vsi_nn_GetVersionMinor();
auto patch = vsi_nn_GetVersionPatch();
vsi_nn_SetGraphVersion(graph_, major, minor, patch);
bool is_fast_mode = options_.isRelaxMode();
if (is_fast_mode) {
VSILOGW("Important notice: float model executed in bfloat16 "
"mode which will have better performance but lower precesion");
}
vsi_nn_SetGraphFastMode(graph_, is_fast_mode);
#if defined(ENABLE_PLATFORM)
auto id = options_.getDeviceId();
vxSetGraphAttribute(graph_->g, VX_GRAPH_DEVICE_INDEX_VIV,
(void*)(&id), sizeof(id));
#endif
std::call_once(setio_once_, [&status, this]() {
status = (vsi_nn_SetGraphInputs(this->graph_, this->inputs_.data(),
this->inputs_.size()) &&
vsi_nn_SetGraphOutputs(this->graph_, this->outputs_.data(),
this->outputs_.size()));
});
std::call_once(setup_once_, [&status, this]() {
status = (VSI_SUCCESS == vsi_nn_SetupGraph(this->graph_, true));
});
return status;
}
bool GraphImpl::Compile() {
bool status = true;
if (not_consumed_input_cnt_ > 0 ) {
// Tensor can bind to different operations
VSILOGW("Graph has free input, INPUT tensor may be created but not consumed.");
}
if (not_consumed_output_cnt_ != 0) {
VSILOGW("Graph has free output, OUTPUT tensor may be created but not consumed.");
}
status = Setup();
std::call_once(verify_graph_once_, [&status, this]() {
status = (VSI_SUCCESS == vsi_nn_VerifyGraph(this->graph_));
});
return status;
}
bool GraphImpl::CompileToBinary(void* buf, size_t* size) {
return ((Setup()) && (VSI_SUCCESS == vsi_nn_GenerateNBG(graph_, buf, size)));
}
bool GraphImpl::Run() {
return ((Compile()) && (VSI_SUCCESS == vsi_nn_RunGraph(graph_)));
}
} // namespace vx
} // namespace tim