diff --git a/cmake/openssl.patch b/cmake/openssl.patch index 8687114..fb6e9cd 100644 --- a/cmake/openssl.patch +++ b/cmake/openssl.patch @@ -61,9 +61,18 @@ index 22ab3cc..00bf90d 100644 + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) endif() diff --git a/cmake/BuildOpenSSL.cmake b/cmake/BuildOpenSSL.cmake -index e31f4bb..a5c5f72 100644 +index e31f4bb..e5e99a6 100644 --- a/cmake/BuildOpenSSL.cmake +++ b/cmake/BuildOpenSSL.cmake +@@ -29,7 +29,7 @@ include(ExternalProject) + + # find packages + find_package(Git REQUIRED) +-find_package(PythonInterp 3 REQUIRED) ++find_package(PythonInterp 2 REQUIRED) + + # # used to apply various patches to OpenSSL + find_program(PATCH_PROGRAM patch) @@ -50,29 +50,25 @@ endif() if ((EXISTS ${OPENSSL_LIBSSL_PATH}) AND (EXISTS ${OPENSSL_LIBCRYPTO_PATH})) message(WARNING "Not building OpenSSL again. Remove ${OPENSSL_LIBSSL_PATH} and ${OPENSSL_LIBCRYPTO_PATH} for rebuild") diff --git a/include/tim/vx/graph.h b/include/tim/vx/graph.h index 41454df..5d594be 100644 --- a/include/tim/vx/graph.h +++ b/include/tim/vx/graph.h @@ -27,13 +27,17 @@ #ifdef BUILD_WITH_BAZEL #include "vsi_feat_ops_def.h" #endif - +#ifdef ENABLE_TENSOR_CACHE +#include +#include +#endif #include #include - namespace tim { namespace vx { - +#ifdef ENABLE_TENSOR_CACHE +const std::string calculateMd5Secret32(const std::string& src); +#endif class Tensor; struct TensorSpec; struct DmaBufferDesc; diff --git a/include/tim/vx/ops.h b/include/tim/vx/ops.h index 7d73a16..270d4fb 100644 --- a/include/tim/vx/ops.h +++ b/include/tim/vx/ops.h @@ -99,6 +99,7 @@ #include "tim/vx/ops/conv3d.h" #include "tim/vx/ops/custom_base.h" #include "tim/vx/ops/topk.h" +#include "tim/vx/ops/tiny_yolov4_postprocess.h" #include "tim/vx/ops/bidirectional_sequence_lstm.h" #include "tim/vx/ops/hashtable_lookup.h" #include "tim/vx/ops/embedding_lookup.h" diff --git a/include/tim/vx/ops/tiny_yolov4_postprocess.h b/include/tim/vx/ops/tiny_yolov4_postprocess.h new file mode 100644 index 0000000..0df6da3 --- /dev/null +++ b/include/tim/vx/ops/tiny_yolov4_postprocess.h @@ -0,0 +1,44 @@ +/**************************************************************************** +* +* Copyright (c) 2020-2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#ifndef TIM_VX_OPS_TINY_YOLOV4_H_ +#define TIM_VX_OPS_TINY_YOLOV4_H_ +#include "tim/vx/builtin_op.h" +#ifdef VSI_FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS +namespace tim { +namespace vx { +namespace ops { + +class TinyYolov4Postprocess : public BuiltinOp { + public: + TinyYolov4Postprocess(Graph* graph); + + std::shared_ptr Clone(std::shared_ptr& graph) const override; +}; + +} // namespace ops +} // namespace vx +} // namespace tim + +#endif //(FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS) +#endif /* TIM_VX_OPS_TINY_YOLOV4_H_ */ \ No newline at end of file diff --git a/src/tim/CMakeLists.txt b/src/tim/CMakeLists.txt index 1d23b78..face782 100644 --- a/src/tim/CMakeLists.txt +++ b/src/tim/CMakeLists.txt @@ -126,15 +126,21 @@ include_directories(${INC_DIRS}) # convert op list as compile flags so that we can implement compile compatable easier if(${TIM_VX_USE_EXTERNAL_OVXLIB}) file(STRINGS "${OVXLIB_INC}/interface/ops.def" ops_file_content) + file(STRINGS "${OVXLIB_INC}/custom/custom_ops.def" custom_ops_file_content) else() file(STRINGS "./vx/internal/include/interface/ops.def" ops_file_content) + file(STRINGS "./vx/internal/include/custom/custom_ops.def" custom_ops_file_content) endif() string(LENGTH "/*; Add new ops to the end.;*_/" comment_len) string(SUBSTRING "${ops_file_content}" ${comment_len} -1 op_list_only) string(REGEX REPLACE "DEF_OP\\(" "-DVSI_FEAT_OP_" op_list_tmp ${op_list_only}) string(REGEX REPLACE "\\)" " " op_as_flags ${op_list_tmp}) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${op_as_flags}") +string(LENGTH "/*; Add custom ops to the end.;*_/" custom_comment_len) +string(SUBSTRING "${custom_ops_file_content}" ${custom_comment_len} -1 custom_op_list_only) +string(REGEX REPLACE "DEF_OP\\(" "-DVSI_FEAT_OP_" custom_op_list_tmp ${custom_op_list_only}) +string(REGEX REPLACE "\\)" " " custom_op_as_flags ${custom_op_list_tmp}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${op_as_flags} ${custom_op_as_flags}") add_library(${TARGET_NAME} ${${TARGET_NAME}_SRCS}) target_include_directories(${TARGET_NAME} PRIVATE ${INC_DIRS}) @@ -170,7 +176,13 @@ endif() # Install install(TARGETS ${TARGET_NAME} ${TARGET_NAME} DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) - + +if(TIM_VX_ENABLE_TENSOR_CACHE) +install( + FILES + ${openssl-cmake_BINARY_DIR}/usr/local/include/openssl/evp.h + DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/openssl) +endif() install( FILES ${CMAKE_SOURCE_DIR}/include/tim/vx/builtin_op.h diff --git a/src/tim/transform/layout_inference.cc b/src/tim/transform/layout_inference.cc index b4fd0a4..1980260 100644 --- a/src/tim/transform/layout_inference.cc +++ b/src/tim/transform/layout_inference.cc @@ -63,6 +63,7 @@ #include "ops/conv3d_layout_inference.h" #include "ops/default_layout_inference.h" #include "ops/transpose_layout_inference.h" +#include "ops/yolov4_layout_inference.h" #include "ops/unidirectional_lstm_layout_inference.h" #include "ops/broadcast_layout_inference.h" #include "ops/unidirectional_rnn_layout_inference.h" @@ -274,6 +275,9 @@ std::vector> HandleLayoutInfer( REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast); REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN, UnidirectionalRnn); REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN, BidirectionalRnn); +#ifdef ENABLE_TENSOR_CACHE + REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4); +#endif REGIST_LOGICAL_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_OPS); REGIST_REDUCE_LAYOUT_INFERENCE(VSI_NN_OP_REDUCE); // use default layout inference diff --git a/src/tim/transform/ops/yolov4_layout_inference.h b/src/tim/transform/ops/yolov4_layout_inference.h new file mode 100644 index 0000000..2e44bbe --- /dev/null +++ b/src/tim/transform/ops/yolov4_layout_inference.h @@ -0,0 +1,77 @@ +/**************************************************************************** + * + * Copyright (c) 2020-2023 Vivante Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + *****************************************************************************/ +#ifndef TIM_LAYOUT_INFER_YOLOV4_LAYOUT_INFERENCE_H_ +#define TIM_LAYOUT_INFER_YOLOV4_LAYOUT_INFERENCE_H_ + +#include "tim/vx/ops/tiny_yolov4_postprocess.h" + +#include "ops/op_layout_inference.h" +#include "permute_vector.h" +#include "builtin_op_impl.h" + +namespace tim { +namespace transform { + +class Yolov4LayoutInfer : public OpLayoutInfer { + public: + Yolov4LayoutInfer( + const std::shared_ptr op, + std::shared_ptr& context) + : OpLayoutInfer(op, context) {} + + void OnInputs( + std::vector>& next_tensors) override { + auto cloned_op = op_->Clone(context_->infer_graph_); + + for (const auto& i_src : op_->impl()->InputsTensor()) { + if (i_src->IsConstTensor()) { + std::vector dataRef(i_src->GetSpec().GetByteSize()); + i_src->CopyDataFromTensor(dataRef.data()); + auto i_infer = context_->infer_graph_->CreateTensor( + i_src->GetSpec(), (const void*)dataRef.data()); + context_->SetPermuteVector(i_src, MakeShared(4)); + context_->UpdateTensorMap(i_src, i_infer); + } + (*cloned_op).BindInput(context_->GetMapedTensor(i_src)); + } + + std::vector> required_pv_lst; + for (auto out_tensor: op_->impl()->OutputsTensor()) { + required_pv_lst.push_back(MakeShared(out_tensor->GetShape().size())); + } + auto out_infer = CreateOutputsTensor(required_pv_lst); + + (*cloned_op).BindOutputs(out_infer); + uint32_t i = 0; + for (auto out_tensor : op_->impl()->OutputsTensor()) { + context_->SetPermuteVector(out_tensor, required_pv_lst[i++]); + next_tensors.push_back(out_tensor); + } + } +}; + +} // namespace transform +} // namespace tim + +#endif \ No newline at end of file diff --git a/src/tim/vx/graph.cc b/src/tim/vx/graph.cc index 0d34d1e..bab4d0b 100644 --- a/src/tim/vx/graph.cc +++ b/src/tim/vx/graph.cc @@ -26,6 +26,7 @@ #ifdef ENABLE_TENSOR_CACHE #include +#include #endif #include "context_private.h" @@ -39,6 +40,44 @@ namespace tim { namespace vx { +#ifdef ENABLE_TENSOR_CACHE +#define MD5_SECRET_LEN_16 (16) +#define MD5_BYTE_STRING_LEN (4) +const std::string calculateMd5Secret32(const std::string& src) { + std::string md5String; + EVP_MD_CTX *mdctx; + const EVP_MD *md; + uint32_t md_len; + unsigned char md_value[MD5_SECRET_LEN_16] = {0}; + char tmp[MD5_BYTE_STRING_LEN] = {0}; + + md = EVP_md5(); + if (md == NULL) { + printf("Unknown EVP_md5 message."); + } + mdctx = EVP_MD_CTX_new(); + if (!EVP_DigestInit_ex(mdctx, md, NULL)) { + printf("EVP_MD_CTX initialization failed."); + EVP_MD_CTX_free(mdctx); + } + if (!EVP_DigestUpdate(mdctx, src.c_str(), src.size())) { + printf("EVP_MD_CTX update failed."); + EVP_MD_CTX_free(mdctx); + } + if (!EVP_DigestFinal_ex(mdctx, md_value, &md_len)) { + printf("EVP_MD_CTX finalization failed."); + EVP_MD_CTX_free(mdctx); + } + EVP_MD_CTX_free(mdctx); + + for (int i = 0; i < MD5_SECRET_LEN_16; ++i) { + memset(tmp, 0x00, sizeof(tmp)); + snprintf(tmp, sizeof(tmp), "%02X", md_value[i]); + md5String += tmp; + } + return md5String; + } +#endif const std::vector> Graph::GetConstantInputs() const { std::vector> const_inputs; @@ -64,44 +103,7 @@ std::map>& GraphImpl::GetTensorCac return cached_tensor_; } -#define MD5_SECRET_LEN_16 (16) -#define MD5_BYTE_STRING_LEN (4) -const std::string GraphImpl::caclulateMd5Secret32(const std::string& src) { - std::string md5String; - EVP_MD_CTX *mdctx; - const EVP_MD *md; - uint32_t md_len; - unsigned char md_value[MD5_SECRET_LEN_16] = {0}; - char tmp[MD5_BYTE_STRING_LEN] = {0}; - - md = EVP_md5(); - if (md == NULL) { - VSILOGE("Unknown EVP_md5 message."); - } - mdctx = EVP_MD_CTX_new(); - if (!EVP_DigestInit_ex(mdctx, md, NULL)) { - VSILOGE("EVP_MD_CTX initialization failed."); - EVP_MD_CTX_free(mdctx); - } - if (!EVP_DigestUpdate(mdctx, src.c_str(), src.size())) { - VSILOGE("EVP_MD_CTX update failed."); - EVP_MD_CTX_free(mdctx); - } - if (!EVP_DigestFinal_ex(mdctx, md_value, &md_len)) { - VSILOGE("EVP_MD_CTX finalization failed."); - EVP_MD_CTX_free(mdctx); - } - EVP_MD_CTX_free(mdctx); - - for (int i = 0; i < MD5_SECRET_LEN_16; ++i) { - memset(tmp, 0x00, sizeof(tmp)); - snprintf(tmp, sizeof(tmp), "%02X", md_value[i]); - md5String += tmp; - } - return md5String; -} - -const std::string GraphImpl::CaclulateCacheKey(const TensorSpec& spec, const void* data) { +const std::string GraphImpl::CalculateCacheKey(const TensorSpec& spec, const void* data) { std::string md5_key; uint32_t data_size = 1; for (auto it = spec.shape_.begin(); it != spec.shape_.end(); ++it) { @@ -125,9 +127,9 @@ const std::string GraphImpl::CaclulateCacheKey(const TensorSpec& spec, const voi break; } if (data_size < 512) { - md5_key = caclulateMd5Secret32(std::string((const char*)data, data_size)); + md5_key = calculateMd5Secret32(std::string((const char*)data, data_size)); } else { - md5_key = caclulateMd5Secret32( + md5_key = calculateMd5Secret32( std::string((const char*)data, 512)); //Take first 512 bytes } return md5_key; @@ -135,7 +137,7 @@ const std::string GraphImpl::CaclulateCacheKey(const TensorSpec& spec, const voi std::shared_ptr GraphImpl::GetTensorFromCache(const TensorSpec& spec, const void* data) { std::shared_ptr tensor; - std::string md5_key = CaclulateCacheKey(spec, data); + std::string md5_key = CalculateCacheKey(spec, data); if (GetTensorCacheMap().find(md5_key) != GetTensorCacheMap().end() && GetTensorCacheMap()[md5_key]->GetQuantization().Scales() == spec.quantization_.Scales() && GetTensorCacheMap()[md5_key]->GetQuantization().ZeroPoints() == spec.quantization_.ZeroPoints()) { diff --git a/src/tim/vx/graph_private.h b/src/tim/vx/graph_private.h index 318b01d..5270260 100644 --- a/src/tim/vx/graph_private.h +++ b/src/tim/vx/graph_private.h @@ -46,8 +46,7 @@ class GraphImpl : public Graph { ~GraphImpl(); #ifdef ENABLE_TENSOR_CACHE std::shared_ptr GetTensorFromCache(const TensorSpec& spec, const void* data); - const std::string CaclulateCacheKey(const TensorSpec& spec, const void* data); - const std::string caclulateMd5Secret32(const std::string& src); + const std::string CalculateCacheKey(const TensorSpec& spec, const void* data); std::map>& GetTensorCacheMap(); #endif /// Return the low-level graph object diff --git a/src/tim/vx/ops/tiny_yolov4_postprocess.cc b/src/tim/vx/ops/tiny_yolov4_postprocess.cc new file mode 100644 index 0000000..85f64e1 --- /dev/null +++ b/src/tim/vx/ops/tiny_yolov4_postprocess.cc @@ -0,0 +1,41 @@ +/**************************************************************************** +* +* Copyright (c) 2020-2023 Vivante Corporation +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +* +*****************************************************************************/ +#include "tim/vx/ops/tiny_yolov4_postprocess.h" + +#include "builtin_op_impl.h" +#include "vsi_nn_pub.h" +#ifdef VSI_FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS +namespace tim { +namespace vx { +namespace ops { +TinyYolov4Postprocess::TinyYolov4Postprocess(Graph* graph) + : BuiltinOp(graph, VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, 4, 2){} + +std::shared_ptr TinyYolov4Postprocess::Clone(std::shared_ptr& graph) const { + return graph->CreateOperation(); +} +} // namespace ops +} // namespace vx +} // namespace tim +#endif //(FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS) \ No newline at end of file