diff --git a/.github/workflows/cmake_x86_vsim.yml b/.github/workflows/cmake_x86_vsim.yml index 03c3b54..c6ea727 100644 --- a/.github/workflows/cmake_x86_vsim.yml +++ b/.github/workflows/cmake_x86_vsim.yml @@ -5,7 +5,7 @@ on: branches: [ main ] workflow_dispatch: branches: [ main ] - + env: # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Release diff --git a/include/tim/vx/graph.h b/include/tim/vx/graph.h index 5d594be..14ee305 100644 --- a/include/tim/vx/graph.h +++ b/include/tim/vx/graph.h @@ -80,13 +80,14 @@ class Graph { virtual const std::vector> InputsTensor() const = 0; virtual const std::vector> OutputsTensor() const = 0; - virtual void UpdateTensorConsumersMap( - const std::shared_ptr& tensor, - const Operation* op) = 0; + virtual void UpdateTensorConsumersMap(const std::shared_ptr& tensor, + const Operation* op) = 0; + virtual void RenewTensorConsumersMap( + const std::shared_ptr& org_tensor, + const std::shared_ptr& dst_tensor, const Operation* op) = 0; - virtual void UpdateTensorProducerMap( - const std::shared_ptr& tensor, - const Operation* op) = 0; + virtual void UpdateTensorProducerMap(const std::shared_ptr& tensor, + const Operation* op) = 0; virtual const std::vector> GetConsumersOp( std::shared_ptr tensor) const = 0; diff --git a/include/tim/vx/operation.h b/include/tim/vx/operation.h index df54734..0effef6 100644 --- a/include/tim/vx/operation.h +++ b/include/tim/vx/operation.h @@ -49,16 +49,16 @@ class Operation { std::unique_ptr& impl(); const std::unique_ptr& impl() const; virtual const std::vector> ConstantInputsTensor() const; - + protected: bool IsAllInputsConst() const; std::unique_ptr impl_; private: // Post processing at the final step on BindInput func -// - tensor : input tensor +// - tensor : input tensor // - input_idx: the index of input tensor - virtual void OnBindInputPostProc(const std::shared_ptr& tensor, int32_t input_idx); + virtual void OnBindInputPostProc(const std::shared_ptr& tensor, int32_t input_idx); }; } // namespace vx diff --git a/include/tim/vx/ops/conv2d.h b/include/tim/vx/ops/conv2d.h index 1c77394..d9829e6 100644 --- a/include/tim/vx/ops/conv2d.h +++ b/include/tim/vx/ops/conv2d.h @@ -37,7 +37,7 @@ namespace ops { * * Performs a 2-D convolution operation, include classic Conv2D / * Depthwise Conv2D / Group Conv2D / Dilation Conv2D. - * + * * Input: * - input [WHCN or CWHN]. * - kernel [ WHIcOc ] (Ic: Input Channels. Oc: Output Channels). @@ -95,6 +95,13 @@ class Conv2d : public BuiltinOp { const std::array pad_; const int32_t multiplier_; const DataLayout kernel_layout_; + +#if defined(__clang__) && (__clang_major__ >= 15) +#define TIM_VX_OPS_CONV2D_WITH_F16BIAS 1 + private: + void OnBindInputPostProc(const std::shared_ptr& tensor, + int32_t input_idx) override; +#endif }; } // namespace ops diff --git a/src/tim/vx/graph.cc b/src/tim/vx/graph.cc index bab4d0b..dfafb59 100644 --- a/src/tim/vx/graph.cc +++ b/src/tim/vx/graph.cc @@ -195,6 +195,22 @@ void GraphImpl::UpdateTensorConsumersMap(const std::shared_ptr& tensor, } } +void GraphImpl::RenewTensorConsumersMap( + const std::shared_ptr& org_tensor, + const std::shared_ptr& dst_tensor, const Operation* op) { + auto exist_op = std::find_if( + op_vector_.begin(), op_vector_.end(), + [op](std::shared_ptr oper) { return oper.get() == op; }); + if (exist_op == op_vector_.end()) { + return; //given op cannot be found + } else { + auto consumer_to_remove = tensor_consumers_.find(org_tensor); + if (consumer_to_remove != tensor_consumers_.end()) + tensor_consumers_.erase(consumer_to_remove); + tensor_consumers_[dst_tensor].push_back(*exist_op); + } +} + void GraphImpl::UpdateTensorProducerMap(const std::shared_ptr& tensor, const Operation* op) { for (const auto& added_op : op_vector_) { diff --git a/src/tim/vx/graph_private.h b/src/tim/vx/graph_private.h index 5270260..230339b 100644 --- a/src/tim/vx/graph_private.h +++ b/src/tim/vx/graph_private.h @@ -62,6 +62,9 @@ class GraphImpl : public Graph { void UpdateTensorConsumersMap(const std::shared_ptr& tensor, const Operation* op) override; + void RenewTensorConsumersMap(const std::shared_ptr& org_tensor, + const std::shared_ptr& dst_tensor, + const Operation* op) override; void UpdateTensorProducerMap(const std::shared_ptr& tensor, const Operation* op) override; const std::vector> GetConsumersOp( diff --git a/src/tim/vx/ops/conv2d.cc b/src/tim/vx/ops/conv2d.cc index 0fe2f0f..4f93b1c 100644 --- a/src/tim/vx/ops/conv2d.cc +++ b/src/tim/vx/ops/conv2d.cc @@ -96,6 +96,34 @@ const std::vector> Conv2d::ConstantInputsTensor() const } } +// Handle float16 bias if clang compiler is no less than 15.0.0 version +#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS +void Conv2d::OnBindInputPostProc(const std::shared_ptr& tensor, + int32_t input_idx) { + if (tensor->GetDataType() == vx::DataType::FLOAT16 && + tensor->IsConstTensor() && impl_->inputs_tensor_.size() == 3) { + uint32_t bias_size = 1; + for (auto i : tensor->GetShape()) { + bias_size *= i; + } + std::vector<_Float16> in(bias_size); + tensor->CopyDataFromTensor(in.data()); + + std::vector out(bias_size); + for (uint i = 0; i < bias_size; i++) { + out[i] = static_cast(in[i]); + } + TensorSpec fp32bias_spec(tim::vx::DataType::FLOAT32, tensor->GetShape(), + tim::vx::TensorAttribute::CONSTANT); + auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, out.data()); + + impl_->inputs_tensor_[2] = out_tensor; + impl_->node()->input.tensors[input_idx] = out_tensor->GetId(); + impl_->graph_->RenewTensorConsumersMap(tensor, out_tensor, this); + } +} +#endif + } // namespace ops } // namespace vx } // namespace tim \ No newline at end of file diff --git a/src/tim/vx/ops/conv2d_test.cc b/src/tim/vx/ops/conv2d_test.cc index ca00178..77db127 100644 --- a/src/tim/vx/ops/conv2d_test.cc +++ b/src/tim/vx/ops/conv2d_test.cc @@ -29,6 +29,81 @@ #include "tim/vx/graph.h" #include "tim/vx/types.h" +#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS +TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) { + auto ctx = tim::vx::Context::Create(); + auto graph = ctx->CreateGraph(); + + tim::vx::ShapeType input_shape({4, 2, 1, 1}); //whcn + tim::vx::ShapeType weight_shape({2, 2, 1, 3}); //whio + tim::vx::ShapeType bias_shape({weight_shape[3]}); + tim::vx::ShapeType output_shape( + {4, 2, weight_shape[3], input_shape[3]}); //whcn + + tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT16, input_shape, + tim::vx::TensorAttribute::INPUT); + tim::vx::TensorSpec weight_spec(tim::vx::DataType::FLOAT16, weight_shape, + tim::vx::TensorAttribute::CONSTANT); + tim::vx::TensorSpec bias_spec(tim::vx::DataType::FLOAT16, bias_shape, + tim::vx::TensorAttribute::CONSTANT); + tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT16, output_shape, + tim::vx::TensorAttribute::OUTPUT); + + // Input data nchw + std::vector<_Float16> input_data = { + 1, 1, 1, 1, // row = 1 + 2, 2, 3, 2 // row = 2 + }; + + // weight data oihw + std::vector<_Float16> weight_data = { + 1, 2, 3, 4, //first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + + // bias data + std::vector<_Float16> bias_data = {1, 2, 3}; + + // nchw + std::vector<_Float16> golden = {// first channel + 18, 22, 21, 8, 7, 9, 8, 3, 2, 3, 1, -1, + // second channel + 2, 3, 1, 0, 5, 6, 6, 4, -1, -2, -2, 1}; + + auto input_tensor = graph->CreateTensor(input_spec); + auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data()); + auto bias_tensor = graph->CreateTensor(bias_spec, bias_data.data()); + auto output_tensor = graph->CreateTensor(output_spec); + + auto padding = tim::vx::PadType::SAME; + std::array stride({1, 1}); + std::array dilation({0, 0}); + + auto conv2d = graph->CreateOperation( + padding, stride, dilation); + (*conv2d) + .BindInput(input_tensor) + .BindInput(weight_tensor) + .BindInput(bias_tensor) + .BindOutput(output_tensor); + + EXPECT_TRUE(graph->Compile()); + + input_tensor->CopyDataToTensor(input_data.data()); + + EXPECT_TRUE(graph->Run()); + + uint32_t output_size = 1; + for (auto i : output_tensor->GetShape()) { + output_size *= i; + } + std::vector<_Float16> output(output_size); + EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data())); + EXPECT_TRUE(ArraysMatch(golden, output, (_Float16)0.1)); +} +#endif + TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) { auto ctx = tim::vx::Context::Create(); auto graph = ctx->CreateGraph();