From 13da73bbe388d6aa3ce01272cf1685dc797e5738 Mon Sep 17 00:00:00 2001 From: "Qin.Chen" Date: Thu, 1 Dec 2022 05:43:27 +0000 Subject: [PATCH] Fix maxpoolgrad, hide unused pool value output Type: Bug Fix --- include/tim/vx/ops/maxpoolgrad.h | 7 ++- src/tim/vx/ops/maxpoolgrad.cc | 94 +++++++++++++++++++++----------- 2 files changed, 67 insertions(+), 34 deletions(-) diff --git a/include/tim/vx/ops/maxpoolgrad.h b/include/tim/vx/ops/maxpoolgrad.h index 9b02d13..e4d58e8 100644 --- a/include/tim/vx/ops/maxpoolgrad.h +++ b/include/tim/vx/ops/maxpoolgrad.h @@ -37,7 +37,8 @@ namespace ops { * ## MaxpooGrad * * Acquire the gradient of 2-D Max pooling operation's input tensor. \ - * Like the tensorflow_XLA op SelectAndScatter, see https://tensorflow.google.cn/xla/operation_semantics?hl=en#selectandscatter. + * Like the tensorflow_XLA op SelectAndScatter, see \ + * https://tensorflow.google.cn/xla/operation_semantics?hl=en#selectandscatter. * * - padding : AUTO, VALID or SAME. * - ksize : filter size. @@ -48,6 +49,10 @@ namespace ops { * * - 0 : input tensor of 2-D Max pooling. * - 1 : gradient of 2-D Max pooling output tensor. + * + * * Outputs: + * + * - 0 : updated tensor of 2-D Max pooling input. */ class MaxpoolGrad: public Operation { diff --git a/src/tim/vx/ops/maxpoolgrad.cc b/src/tim/vx/ops/maxpoolgrad.cc index a1f824c..17c083a 100644 --- a/src/tim/vx/ops/maxpoolgrad.cc +++ b/src/tim/vx/ops/maxpoolgrad.cc @@ -36,10 +36,11 @@ namespace ops { class MaxpoolGradImpl : public OpImpl { public: enum { - TENSOR_BEFORE_POOL = 0, - UPDATES_TENSOR, - INPUT_CNT, - OUT_CNT = 1, + POOL_INPUT_TENSOR = 0, + GRADIENT_TENSOR = 1, + INPUT_CNT = 2, + UPDATED_TENSOR = 0, + OUTPUT_CNT = 1, }; MaxpoolGradImpl(Graph* graph, PadType padding, const std::array& ksize, @@ -52,52 +53,73 @@ class MaxpoolGradImpl : public OpImpl { ksize_(ksize), stride_(stride), round_type_(round_type) { - maxpoolwithargmax2_ = graph->CreateOperation( - padding_, ksize_, stride_, round_type_, layout_); + maxpoolwithargmax2_ = + graph->CreateOperation( + padding_, ksize_, stride_, round_type_, layout_); } ~MaxpoolGradImpl() {} - - MaxpoolGradImpl& BindInput(const std::shared_ptr& tensor) override { in_tensors_[input_tensor_index] = tensor; if (this->input_tensor_index == INPUT_CNT - 1) { - tim::vx::ShapeType in_shape = in_tensors_[TENSOR_BEFORE_POOL]->GetShape(); - tim::vx::ShapeType updates_shape = in_tensors_[UPDATES_TENSOR]->GetShape(); - tim::vx::ShapeType idx_flattened_shape({CalFlattenedShape(updates_shape)}); + tim::vx::ShapeType in_shape = in_tensors_[POOL_INPUT_TENSOR]->GetShape(); + tim::vx::ShapeType grad_shape = in_tensors_[GRADIENT_TENSOR]->GetShape(); + tim::vx::ShapeType idx_flattened_shape({CalFlattenedShape(grad_shape)}); tim::vx::ShapeType out_flattened_shape({CalFlattenedShape(in_shape)}); - tim::vx::TensorSpec pool_out_spec_indices(tim::vx::DataType::INT32, - updates_shape, tim::vx::TensorAttribute::TRANSIENT); tim::vx::TensorSpec pool_out_spec_values(tim::vx::DataType::FLOAT32, - updates_shape, tim::vx::TensorAttribute::OUTPUT); + grad_shape, tim::vx::TensorAttribute::TRANSIENT); + tim::vx::TensorSpec pool_out_spec_indices(tim::vx::DataType::INT32, + grad_shape, tim::vx::TensorAttribute::TRANSIENT); tim::vx::TensorSpec idx_flattened_spec(tim::vx::DataType::INT32, - idx_flattened_shape, tim::vx::TensorAttribute::TRANSIENT); + idx_flattened_shape,tim::vx::TensorAttribute::TRANSIENT); tim::vx::TensorSpec upd_flattened_spec(tim::vx::DataType::FLOAT32, - idx_flattened_shape, tim::vx::TensorAttribute::TRANSIENT); + idx_flattened_shape, tim::vx::TensorAttribute::TRANSIENT); tim::vx::TensorSpec out_flattened_spec(tim::vx::DataType::FLOAT32, - out_flattened_shape, tim::vx::TensorAttribute::TRANSIENT); - - auto pool_out_indices_tensor = graph_->CreateTensor(pool_out_spec_indices); + out_flattened_shape, tim::vx::TensorAttribute::TRANSIENT); + auto pool_out_values_tensor = graph_->CreateTensor(pool_out_spec_values); + auto pool_out_indices_tensor = graph_->CreateTensor(pool_out_spec_indices); auto idx_flattened_tensor = graph_->CreateTensor(idx_flattened_spec); auto upd_flattened_tensor = graph_->CreateTensor(upd_flattened_spec); auto out_flattened_tensor = graph_->CreateTensor(out_flattened_spec); - (*maxpoolwithargmax2_).BindInput(in_tensors_[TENSOR_BEFORE_POOL]) - .BindOutputs({pool_out_values_tensor, pool_out_indices_tensor}); - - flatten_idx = graph_->CreateOperation(idx_flattened_shape); - (*flatten_idx).BindInput(pool_out_indices_tensor).BindOutput(idx_flattened_tensor); + (*maxpoolwithargmax2_).BindInput(in_tensors_[POOL_INPUT_TENSOR]) + .BindOutputs({pool_out_values_tensor, pool_out_indices_tensor}); - flatten_upd = graph_->CreateOperation(idx_flattened_shape); - (*flatten_upd).BindInput(in_tensors_[UPDATES_TENSOR]).BindOutput(upd_flattened_tensor); + // eliminate pool out of maxpoolwithargmax begin + tim::vx::TensorSpec sliced_spec(tim::vx::DataType::FLOAT32, + {1, 1, 1, 1}, tim::vx::TensorAttribute::TRANSIENT); + auto sliced_tensor = graph_->CreateTensor(sliced_spec); + auto one_zero_tensor = graph_->CreateTensor(sliced_spec); + auto grad_tensor = graph_->CreateTensor(pool_out_spec_values); + + std::vector start = {0, 0, 0, 0}; + std::vector length = {1, 1, 1, 1}; + slice_one_ = graph_->CreateOperation(0, start, length); + (*slice_one_).BindInput(pool_out_values_tensor).BindOutput(sliced_tensor); + + self_sub_ = graph_->CreateOperation(); + (*self_sub_).BindInputs({sliced_tensor, sliced_tensor}).BindOutput(one_zero_tensor); + + add_zeros_ = graph_->CreateOperation(); + (*add_zeros_).BindInputs({one_zero_tensor, in_tensors_[GRADIENT_TENSOR]}) + .BindOutput(grad_tensor); + // eliminate pool out of maxpoolwithargmax end + + flatten_idx_ = graph_->CreateOperation(idx_flattened_shape); + (*flatten_idx_).BindInput(pool_out_indices_tensor).BindOutput(idx_flattened_tensor); + + flatten_upd_ = graph_->CreateOperation(idx_flattened_shape); + (*flatten_upd_).BindInput(grad_tensor).BindOutput(upd_flattened_tensor); scatternd_ = graph_->CreateOperation(out_flattened_shape); - (*scatternd_).BindInputs({idx_flattened_tensor, upd_flattened_tensor}).BindOutput(out_flattened_tensor); + (*scatternd_).BindInputs({idx_flattened_tensor, upd_flattened_tensor}) + .BindOutput(out_flattened_tensor); reshape_like_input_ = graph_->CreateOperation(in_shape); (*reshape_like_input_).BindInput(out_flattened_tensor); + } this->input_tensor_index++; return *this; @@ -105,7 +127,9 @@ class MaxpoolGradImpl : public OpImpl { MaxpoolGradImpl& BindOutput(const std::shared_ptr& tensor) override { out_tensors_[output_tensor_index] = tensor; - (*reshape_like_input_).BindOutput(tensor); + if (this->output_tensor_index == OUTPUT_CNT - 1) { + (*reshape_like_input_).BindOutput(out_tensors_[UPDATED_TENSOR]); + } this->output_tensor_index++; return *this; } @@ -126,12 +150,16 @@ class MaxpoolGradImpl : public OpImpl { const RoundType round_type_; std::shared_ptr maxpoolwithargmax2_; - std::shared_ptr flatten_idx; - std::shared_ptr flatten_upd; + std::shared_ptr slice_one_; + std::shared_ptr self_sub_; + std::shared_ptr add_zeros_; + std::shared_ptr flatten_idx_; + std::shared_ptr flatten_upd_; std::shared_ptr scatternd_; std::shared_ptr reshape_like_input_; + std::shared_ptr reshape_pool_output_; std::array, INPUT_CNT> in_tensors_; - std::array, OUT_CNT> out_tensors_; + std::array, OUTPUT_CNT> out_tensors_; uint32_t CalFlattenedShape(const tim::vx::ShapeType& shape) { uint32_t out = 1; for(auto& x: shape) { @@ -150,7 +178,7 @@ MaxpoolGrad::MaxpoolGrad(Graph* graph, PadType padding, ksize_(ksize), stride_(stride), round_type_(round_type) { - impl_ = std::make_unique(graph, padding, ksize, stride, 0, 0, round_type, layout); + impl_ = std::make_unique(graph, padding, ksize, stride, 2, 1, round_type, layout); } std::shared_ptr MaxpoolGrad::Clone( @@ -164,4 +192,4 @@ std::shared_ptr MaxpoolGrad::Clone( } // namespace vx } // namespace tim -#endif //(VSI_FEAT_OP_MAXPOOLWITHARGMAX) \ No newline at end of file +#endif //(VSI_FEAT_OP_MAXPOOLWITHARGMAX)