Added roi_align layoutinfer & cases (#615)

* Added roi_align layoutinfer & cases Type: New feature Signed-off-by: Chen <jack.chen@verisilicon.com> * Update instancenorm op spec .json Type: bug fix Signed-off-by: Chen <jack.chen@verisilicon.com> * Added roi_pool layoutinfer & fixed case bug Type: new feature Signed-off-by: Chen <jack.chen@verisilicon.com> --------- Signed-off-by: Chen <jack.chen@verisilicon.com> Co-authored-by: Chen <jack.chen@verisilicon.com>
2023-07-08 23:39:56 +08:00 · 2023-07-08 23:39:56 +08:00 · ea8046ec9c
parent 32c5a61601
commit ea8046ec9c
12 changed files with 311 additions and 19 deletions
--- a/include/tim/vx/ops/instanceNormalization.json
+++ b/include/tim/vx/ops/instanceNormalization.json
@ -6,6 +6,14 @@
                 "dtype": "float",
                 "Optional": "true",
                 "default": "1e-5f"
+                },
+                {"name": "input_layout",
+                    "dtype": "tim::vx::DataLayout",
+                    "Optional": "true",
+                    "default": "tim::vx::DataLayout::WHCN",
+                    "range":["tim::vx::DataLayout::ANY",
+                             "tim::vx::DataLayout::WHCN",
+                             "tim::vx::DataLayout::CWHN"]
                }
            ]
    }
--- a/include/tim/vx/ops/roi_align.h
+++ b/include/tim/vx/ops/roi_align.h
@ -51,7 +51,7 @@ class RoiAlign : public BuiltinOp {
 public:
  RoiAlign(Graph* graph, int32_t output_height, int32_t output_width,
            float height_ratio, float width_ratio, int32_t height_sample_num,
-            int32_t width_sample_num);
+            int32_t width_sample_num, DataLayout input_layout = DataLayout::WHCN);

  std::shared_ptr<Operation> Clone(
      std::shared_ptr<Graph>& graph) const override;
--- a/include/tim/vx/ops/roi_pool.h
+++ b/include/tim/vx/ops/roi_pool.h
@ -37,17 +37,17 @@ namespace ops {
 *
 * Select and scale the feature map of each region of interest to a unified output
 * size by max-pooling.
- * 
+ *
 *    pool_type : only support max-pooling  (MAX)
- *    scale : The ratio of image to feature map (Range: 0 < scale <= 1) 
+ *    scale : The ratio of image to feature map (Range: 0 < scale <= 1)
 *    size : The size of roi pooling (height/width)
 *
 */

 class RoiPool : public BuiltinOp {
 public:
-  RoiPool(Graph* graph, PoolType type, float scale,
-            const std::array<uint32_t, 2>& size);
+  RoiPool(Graph* graph, PoolType type, float scale, const std::array<uint32_t, 2>& size,
+          DataLayout input_layout = DataLayout::WHCN);

  std::shared_ptr<Operation> Clone(
      std::shared_ptr<Graph>& graph) const override;
--- a/include/tim/vx/ops/roialign.json
+++ b/include/tim/vx/ops/roialign.json
@ -19,6 +19,14 @@
                },
                {"name":"width_sample_num",
                    "dtype": "int32_t"
+                },
+                {"name": "input_layout",
+                    "dtype": "tim::vx::DataLayout",
+                    "Optional": "true",
+                    "default": "tim::vx::DataLayout::WHCN",
+                    "range":["tim::vx::DataLayout::ANY",
+                             "tim::vx::DataLayout::WHCN",
+                             "tim::vx::DataLayout::CWHN"]
                }
            ]
    }
--- a/include/tim/vx/ops/roipool.json
+++ b/include/tim/vx/ops/roipool.json
@ -14,6 +14,14 @@
                },
                {"name":"size",
                    "dtype": "std::array<uint32_t, 2>"
+                },
+                {"name": "input_layout",
+                    "dtype": "tim::vx::DataLayout",
+                    "Optional": "true",
+                    "default": "tim::vx::DataLayout::WHCN",
+                    "range":["tim::vx::DataLayout::ANY",
+                             "tim::vx::DataLayout::WHCN",
+                             "tim::vx::DataLayout::CWHN"]
                }
            ]
    }
--- a/src/tim/transform/layout_inference.cc
+++ b/src/tim/transform/layout_inference.cc
@ -69,6 +69,8 @@
 #include "ops/broadcast_layout_inference.h"
 #include "ops/unidirectional_rnn_layout_inference.h"
 #include "ops/bidirectional_rnn_layout_inference.h"
+#include "ops/roi_align_layout_inference.h"
+#include "ops/roi_pool_layout_inference.h"

 #include <algorithm>
 #include <deque>
@ -260,6 +262,8 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
+    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
+    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);
--- a/src/tim/transform/layout_inference_test.cc
+++ b/src/tim/transform/layout_inference_test.cc
@ -351,4 +351,72 @@ TEST(Resize, bilinear_outputsize) {
    std::vector<float> output(golden.size());
    EXPECT_TRUE(infer_output->CopyDataFromTensor(output.data()));
    EXPECT_TRUE(ArraysMatch(golden, output, 1e-5f));
+}
+
+TEST(RoiAlign, nhwc) {
+  auto ctx = tim::vx::Context::Create();
+  auto src_graph = ctx->CreateGraph();
+
+  tim::vx::ShapeType input_shape({1, 4, 4, 1});  //cwhn
+  tim::vx::ShapeType regions_shape({4, 4});
+  tim::vx::ShapeType batch_index_shape({4});
+  tim::vx::ShapeType output_shape({1, 2, 2, 4});
+
+  tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT32, input_shape,
+                                 tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec regions_spec(tim::vx::DataType::FLOAT32, regions_shape,
+                                   tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec batch_index_spec(tim::vx::DataType::INT32,
+                                       batch_index_shape,
+                                       tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32, output_shape,
+                                  tim::vx::TensorAttribute::OUTPUT);
+
+  std::vector<float> input_data = {-10.0f, -1.0f, 4.0f,  -5.0f, -8.0f, -2.0f,
+                                   9.0f,   1.0f,  7.0f,  -2.0f, 3.0f,  -7.0f,
+                                   -2.0f,  10.0f, -3.0f, 5.0f};
+
+  std::vector<float> regions_data = {2.0f, 2.0f, 4.0f, 4.0f, 0.0f, 0.0f,
+                                     8.0f, 8.0f, 2.0f, 0.0f, 4.0f, 8.0f,
+                                     0.0f, 2.0f, 8.0f, 4.0f};
+
+  std::vector<int32_t> batch_index_data = {0, 0, 0, 0};
+
+  std::vector<float> golden = {
+      0.375f, 5.125f, -0.375f, 2.875f, -0.5f,    -0.3125f, 3.1875f, 1.125f,
+      0.25f,  4.25f,  4.875f,  0.625f, -0.1875f, 1.125f,   0.9375f, -2.625f};
+
+  auto input_tensor = src_graph->CreateTensor(input_spec);
+  auto regions_tensor = src_graph->CreateTensor(regions_spec, regions_data.data());
+  auto batch_index_tensor =
+      src_graph->CreateTensor(batch_index_spec, batch_index_data.data());
+  auto output_tensor = src_graph->CreateTensor(output_spec);
+
+  auto roi_align = src_graph->CreateOperation<tim::vx::ops::RoiAlign>(
+      2, 2, 2.0f, 2.0f, 4, 4, tim::vx::DataLayout::CWHN);
+  (*roi_align)
+      .BindInput(input_tensor)
+      .BindInput(regions_tensor)
+      .BindInput(batch_index_tensor)
+      .BindOutput(output_tensor);
+
+  // Do layout inference
+  auto transform = tim::transform::LayoutInference(src_graph, ctx);
+  auto infer_graph = transform.first;
+  auto graph_io_map = transform.second;
+  infer_graph->Compile();
+
+  auto infer_input = graph_io_map[src_graph->InputsTensor()[0]];
+  auto infer_beta = graph_io_map[src_graph->InputsTensor()[1]];
+  auto infer_gamma = graph_io_map[src_graph->InputsTensor()[2]];
+  auto infer_output = graph_io_map[src_graph->OutputsTensor()[0]];
+
+  infer_input->CopyDataToTensor(input_data.data(), input_data.size() * sizeof(float));
+  infer_beta->CopyDataToTensor(regions_data.data(), regions_data.size() * sizeof(float));
+  infer_gamma->CopyDataToTensor(batch_index_data.data(), batch_index_data.size() * sizeof(float));
+  infer_graph->Run();
+
+  std::vector<float> output(golden.size());
+  EXPECT_TRUE(infer_output->CopyDataFromTensor(output.data()));
+  EXPECT_TRUE(ArraysMatch(golden, output, 1e-5f));
 }
--- a/src/tim/transform/ops/roi_align_layout_inference.h
+++ b/src/tim/transform/ops/roi_align_layout_inference.h
@ -0,0 +1,99 @@
+/****************************************************************************
+ *
+ *    Copyright (c) 2020-2023 Vivante Corporation
+ *
+ *    Permission is hereby granted, free of charge, to any person obtaining a
+ *    copy of this software and associated documentation files (the "Software"),
+ *    to deal in the Software without restriction, including without limitation
+ *    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *    and/or sell copies of the Software, and to permit persons to whom the
+ *    Software is furnished to do so, subject to the following conditions:
+ *
+ *    The above copyright notice and this permission notice shall be included in
+ *    all copies or substantial portions of the Software.
+ *
+ *    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *    DEALINGS IN THE SOFTWARE.
+ *
+ *****************************************************************************/
+#ifndef TIM_LAYOUT_INFER_ROI_ALIGN_LAYOUT_INFERENCE_H_
+#define TIM_LAYOUT_INFER_ROI_ALIGN_LAYOUT_INFERENCE_H_
+
+#include "tim/vx/ops/roi_align.h"
+
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "builtin_op_impl.h"
+
+namespace tim {
+namespace transform {
+
+class RoiAlignLayoutInfer : public OpLayoutInfer {
+ public:
+  RoiAlignLayoutInfer(
+      const std::shared_ptr<vx::Operation> op,
+      std::shared_ptr<layout_inference_impl::LayoutInferContext>& context)
+      : OpLayoutInfer(op, context) {}
+
+  void OnInputs(
+      std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
+    vx::DataLayout layout = op_->impl()->layout_;
+    auto input_tensors = op_->impl()->InputsTensor();
+    std::shared_ptr<IPermuteVector> required_pv;
+    switch (layout)
+    { // kernel layout must be IWHO in tflite & nnapi
+      case vx::DataLayout::CWHN:
+        required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+        break;
+      case vx::DataLayout::WHCN:
+        required_pv = MakeShared(4);
+        break;
+      default:
+        VSILOGE("The layout of input is not support.");
+        required_pv = MakeShared(4);
+        break;
+    }
+    auto input_pv = context_->GetPermuteVector(input_tensors[0]);
+    auto final_pv = input_pv->Reverse()->Add(required_pv);
+    std::shared_ptr<vx::Tensor> infer_input;
+    if (!final_pv->IsAligned()) {
+      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      context_->SetPermuteVector(input_tensors[0], required_pv);
+    } else {
+      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      context_->SetPermuteVector(input_tensors[0], input_pv);
+    }
+    context_->UpdateTensorMap(input_tensors[0], infer_input);
+
+    for (const auto& t_src : op_->impl()->InputsTensor()) {
+      if(t_src->IsConstTensor()) {
+        std::vector<uint8_t> dataRef(t_src->GetSpec().GetByteSize());
+        t_src->CopyDataFromTensor(dataRef.data());
+        auto t_infer = context_->infer_graph_->CreateTensor(
+                t_src->GetSpec(), (const void*)dataRef.data());
+        context_->SetPermuteVector(t_src, MakeShared(t_src->GetShape().size()));
+        context_->UpdateTensorMap(t_src, t_infer);
+      }
+    }
+
+    auto roi_align = op_->Clone(context_->infer_graph_);
+    auto outs_infer = CreateOutputsTensor(required_pv);
+    for (const auto& i_src : op_->impl()->InputsTensor()) {
+      (*roi_align).BindInput(context_->GetMapedTensor(i_src));
+    }
+    (*roi_align).BindOutput(outs_infer[0]);
+    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
+    // Add out tensor of src_graph into next_tensor
+    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
+  }
+};
+
+}  // namespace transform
+}  // namespace tim
+
+#endif
--- a/src/tim/transform/ops/roi_pool_layout_inference.h
+++ b/src/tim/transform/ops/roi_pool_layout_inference.h
@ -0,0 +1,99 @@
+/****************************************************************************
+ *
+ *    Copyright (c) 2020-2023 Vivante Corporation
+ *
+ *    Permission is hereby granted, free of charge, to any person obtaining a
+ *    copy of this software and associated documentation files (the "Software"),
+ *    to deal in the Software without restriction, including without limitation
+ *    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *    and/or sell copies of the Software, and to permit persons to whom the
+ *    Software is furnished to do so, subject to the following conditions:
+ *
+ *    The above copyright notice and this permission notice shall be included in
+ *    all copies or substantial portions of the Software.
+ *
+ *    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *    DEALINGS IN THE SOFTWARE.
+ *
+ *****************************************************************************/
+#ifndef TIM_LAYOUT_INFER_ROI_POOL_LAYOUT_INFERENCE_H_
+#define TIM_LAYOUT_INFER_ROI_POOL_LAYOUT_INFERENCE_H_
+
+#include "tim/vx/ops/roi_pool.h"
+
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "builtin_op_impl.h"
+
+namespace tim {
+namespace transform {
+
+class RoiPoolLayoutInfer : public OpLayoutInfer {
+ public:
+  RoiPoolLayoutInfer(
+      const std::shared_ptr<vx::Operation> op,
+      std::shared_ptr<layout_inference_impl::LayoutInferContext>& context)
+      : OpLayoutInfer(op, context) {}
+
+  void OnInputs(
+      std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
+    vx::DataLayout layout = op_->impl()->layout_;
+    auto input_tensors = op_->impl()->InputsTensor();
+    std::shared_ptr<IPermuteVector> required_pv;
+    switch (layout)
+    { // kernel layout must be IWHO in tflite & nnapi
+      case vx::DataLayout::CWHN:
+        required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+        break;
+      case vx::DataLayout::WHCN:
+        required_pv = MakeShared(4);
+        break;
+      default:
+        VSILOGE("The layout of input is not support.");
+        required_pv = MakeShared(4);
+        break;
+    }
+    auto input_pv = context_->GetPermuteVector(input_tensors[0]);
+    auto final_pv = input_pv->Reverse()->Add(required_pv);
+    std::shared_ptr<vx::Tensor> infer_input;
+    if (!final_pv->IsAligned()) {
+      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      context_->SetPermuteVector(input_tensors[0], required_pv);
+    } else {
+      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      context_->SetPermuteVector(input_tensors[0], input_pv);
+    }
+    context_->UpdateTensorMap(input_tensors[0], infer_input);
+
+    for (const auto& t_src : op_->impl()->InputsTensor()) {
+      if(t_src->IsConstTensor()) {
+        std::vector<uint8_t> dataRef(t_src->GetSpec().GetByteSize());
+        t_src->CopyDataFromTensor(dataRef.data());
+        auto t_infer = context_->infer_graph_->CreateTensor(
+                t_src->GetSpec(), (const void*)dataRef.data());
+        context_->SetPermuteVector(t_src, MakeShared(t_src->GetShape().size()));
+        context_->UpdateTensorMap(t_src, t_infer);
+      }
+    }
+
+    auto roi_pool = op_->Clone(context_->infer_graph_);
+    auto outs_infer = CreateOutputsTensor(required_pv);
+    for (const auto& i_src : op_->impl()->InputsTensor()) {
+      (*roi_pool).BindInput(context_->GetMapedTensor(i_src));
+    }
+    (*roi_pool).BindOutput(outs_infer[0]);
+    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
+    // Add out tensor of src_graph into next_tensor
+    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
+  }
+};
+
+}  // namespace transform
+}  // namespace tim
+
+#endif
--- a/src/tim/vx/ops/roi_align.cc
+++ b/src/tim/vx/ops/roi_align.cc
@ -32,8 +32,8 @@ namespace ops {

 RoiAlign::RoiAlign(Graph* graph, int32_t output_height, int32_t output_width,
          float height_ratio, float width_ratio, int32_t height_sample_num,
-          int32_t width_sample_num)
-    : BuiltinOp(graph, VSI_NN_OP_ROI_ALIGN),
+          int32_t width_sample_num, DataLayout input_layout)
+    : BuiltinOp(graph, VSI_NN_OP_ROI_ALIGN, 0, 0, input_layout),
      output_height_(output_height),
      output_width_(output_width),
      height_ratio_(height_ratio),
@ -53,7 +53,8 @@ std::shared_ptr<Operation> RoiAlign::Clone(
    std::shared_ptr<Graph>& graph) const {
  return graph->CreateOperation<RoiAlign>(
      this->output_height_, this->output_width_, this->height_ratio_,
-      this->width_ratio_, this->height_sample_num_, this->width_sample_num_);
+      this->width_ratio_, this->height_sample_num_, this->width_sample_num_,
+      this->impl_->layout_);
 }

 }  // namespace ops
--- a/src/tim/vx/ops/roi_pool.cc
+++ b/src/tim/vx/ops/roi_pool.cc
@ -32,8 +32,8 @@ namespace vx {
 namespace ops {

 RoiPool::RoiPool(Graph* graph, PoolType type, float scale,
-                   const std::array<uint32_t, 2>& size)
-    : BuiltinOp(graph, VSI_NN_OP_ROI_POOL),
+                 const std::array<uint32_t, 2>& size, DataLayout input_layout)
+    : BuiltinOp(graph, VSI_NN_OP_ROI_POOL, 0, 0, input_layout),
      type_(type),
      scale_(scale),
      size_(size) {
@ -46,7 +46,7 @@ RoiPool::RoiPool(Graph* graph, PoolType type, float scale,
 std::shared_ptr<Operation> RoiPool::Clone(
    std::shared_ptr<Graph>& graph) const {
  return graph->CreateOperation<RoiPool>(
-      this->type_, this->scale_, this->size_);
+      this->type_, this->scale_, this->size_, this->impl_->layout_);
 }

 }  // namespace ops
--- a/src/tim/vx/ops/roi_pool_test.cc
+++ b/src/tim/vx/ops/roi_pool_test.cc
@ -57,7 +57,7 @@ TEST(RoiPool, shape_4_2_1_1_float32) {
  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32, output_shape,
                                  tim::vx::TensorAttribute::OUTPUT);

-  std::vector<float> input_data = {-10.0f, -1.0f, 4.0f,  -5.0f, 
+  std::vector<float> input_data = {-10.0f, -1.0f, 4.0f,  -5.0f,
                                  -8.0f, -2.0f, 9.0f,   1.0f,
                                   7.0f, -2.0f, 3.0f,  -7.0f,
                                   -2.0f,  10.0f, -3.0f, 5.0f};
@ -67,7 +67,6 @@ TEST(RoiPool, shape_4_2_1_1_float32) {
                                     0.0f, 2.0f, 0.0f, 4.0f, 8.0f,
                                     0.0f, 0.0f, 2.0f, 8.0f, 4.0f};

-
  std::vector<float> golden = {
      -2, 9, -2, 3,
      9, 9, 10, 5,
@ -77,18 +76,16 @@ TEST(RoiPool, shape_4_2_1_1_float32) {
  auto input_tensor = graph->CreateTensor(input_spec);
  auto regions_tensor = graph->CreateTensor(regions_spec);
  auto output_tensor = graph->CreateTensor(output_spec);
-  
+
   std::array<uint32_t, 2> size;
-   size[0] = out_height;
-   size[1] = out_width;
+   size[0] = out_width;
+   size[1] = out_height;
  auto roi_pool = graph->CreateOperation<tim::vx::ops::RoiPool>(tim::vx::PoolType::MAX, scale, size);
  (*roi_pool)
      .BindInput(input_tensor)
      .BindInput(regions_tensor)
      .BindOutput(output_tensor);

-  
-
  EXPECT_TRUE(input_tensor->CopyDataToTensor(input_data.data(), input_data.size()*sizeof(float)));
  EXPECT_TRUE(regions_tensor->CopyDataToTensor(regions_data.data(), regions_data.size()*sizeof(float)));
  EXPECT_TRUE(graph->Compile());
@ -97,4 +94,4 @@ TEST(RoiPool, shape_4_2_1_1_float32) {
  std::vector<float> output(num_rois * out_height * out_width * depth);
  EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
  EXPECT_EQ(golden, output);
-}
+}