From 0e211c8efdf412a7d09629aa0452d9356e6d113b Mon Sep 17 00:00:00 2001
From: Chen Xin <jack.chen@verisilicon.com>
Date: Thu, 1 Dec 2022 16:24:55 +0800
Subject: [PATCH] Fixed (groupd)conv2d layout infer bug

And added a weight_as_input case to test

Type: Bug Fix

Signed-off-by: Chen Xin <jack.chen@verisilicon.com>
---
 src/tim/transform/layout_inference_test.cc    |  58 +++++++
 .../transform/ops/conv2d_layout_inference.h   | 156 +++++++++---------
 .../ops/grouped_conv2d_layout_inference.h     | 137 ++++++++-------
 src/tim/vx/ops/conv2d_test.cc                 |   2 +-
 4 files changed, 215 insertions(+), 138 deletions(-)
diff --git a/src/tim/transform/layout_inference_test.cc b/src/tim/transform/layout_inference_test.cc
index 9852233..5500ba4 100644
--- a/src/tim/transform/layout_inference_test.cc
+++ b/src/tim/transform/layout_inference_test.cc
@@ -58,4 +58,62 @@ TEST(LayoutInference, simple_conv2d) {
                           sizeof(float) * out_data.size()));
   tim::vx::ShapeType expect_shape({1, 2, 2, 1});
   EXPECT_EQ(infer_out_shape, expect_shape);
+}
+
+TEST(LayoutInference, weight_as_input_conv2d) {
+  auto ctx = tim::vx::Context::Create();
+  auto src_graph = ctx->CreateGraph();
+  tim::vx::ShapeType input_shape({3, 3, 1, 1}); //WHCN
+  tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT32, input_shape,
+                                 tim::vx::TensorAttribute::INPUT);
+  auto input = src_graph->CreateTensor(input_spec);
+
+  tim::vx::ShapeType kernel_shape({1, 2, 2, 1}); //IWHO
+  tim::vx::TensorSpec kernel_spec(tim::vx::DataType::FLOAT32, kernel_shape,
+                                  tim::vx::TensorAttribute::INPUT);
+  auto kernel = src_graph->CreateTensor(kernel_spec);
+
+  tim::vx::ShapeType bias_shape({1});
+  tim::vx::TensorSpec bias_spec(tim::vx::DataType::FLOAT32, bias_shape,
+                                tim::vx::TensorAttribute::INPUT);
+  auto bias = src_graph->CreateTensor(bias_spec);
+
+  tim::vx::ShapeType output_shape({2, 2, 1, 1}); //WHCN
+  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32, output_shape,
+                                  tim::vx::TensorAttribute::OUTPUT);
+  auto output = src_graph->CreateTensor(output_spec);
+
+  auto conv2d = src_graph->CreateOperation<tim::vx::ops::Conv2d>(
+      0, tim::vx::PadType::AUTO, std::array<uint32_t, 2>({0, 0}),
+      std::array<uint32_t, 2>({1, 1}), std::array<uint32_t, 2>({1, 1}),
+      std::array<uint32_t, 4>({0, 0, 0, 0}), 0, tim::vx::DataLayout::WHCN,
+      tim::vx::DataLayout::IcWHOc);
+  (*conv2d).BindInputs({input, kernel, bias}).BindOutput(output);
+  // Do layout inference
+  auto transform = tim::transform::LayoutInference(src_graph, ctx);
+  auto infer_graph = transform.first;
+  auto graph_io_map = transform.second;
+  infer_graph->Compile();
+  std::vector<float> input_data = {1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 1.0f, 1.0f, 1.0f, 1.0f};
+  std::vector<float> kernel_data = {0.25f, 0.25f, 0.25f, 0.25f};
+  std::vector<float> bias_data = {0.0f};
+  auto infer_input = graph_io_map[src_graph->InputsTensor()[0]];
+  auto infer_weight = graph_io_map[src_graph->InputsTensor()[1]];
+  auto infer_bias = graph_io_map[src_graph->InputsTensor()[2]];
+  auto infer_output = graph_io_map[src_graph->OutputsTensor()[0]];
+
+  infer_input->CopyDataToTensor(input_data.data(), input_data.size() * sizeof(float));
+  infer_weight->CopyDataToTensor(kernel_data.data(), kernel_data.size() * sizeof(float));
+  infer_bias->CopyDataToTensor(bias_data.data(), bias_data.size() * sizeof(float));
+  infer_graph->Run();
+  std::vector<float> out_data;
+  auto infer_out_shape = infer_output->GetShape();
+  out_data.resize(infer_out_shape[0] * infer_out_shape[1] * infer_out_shape[2] *
+                  infer_out_shape[3]);
+  infer_output->CopyDataFromTensor(out_data.data());
+  std::vector<float> expect_output = {0.875f, 0.875f, 0.875f, 0.875f};
+  EXPECT_TRUE(0 == memcmp((void*)out_data.data(), (void*)expect_output.data(),
+                          sizeof(float) * out_data.size()));
+  tim::vx::ShapeType expect_shape({2, 2, 1, 1});
+  EXPECT_EQ(infer_out_shape, expect_shape);
 }
\ No newline at end of file
diff --git a/src/tim/transform/ops/conv2d_layout_inference.h b/src/tim/transform/ops/conv2d_layout_inference.h
index e0866a7..f1e93cd 100644
--- a/src/tim/transform/ops/conv2d_layout_inference.h
+++ b/src/tim/transform/ops/conv2d_layout_inference.h
@@ -40,93 +40,91 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
       : OpLayoutInfer(op, context) {}
   void OnInputs(
       std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
+    auto src_conv2d = std::static_pointer_cast<vx::ops::Conv2d>(op_);
     vx::DataLayout layout = op_->impl()->layout_;
-    auto required_pv = MakeShared(4);
-    if (layout == vx::DataLayout::CWHN) {
-      required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+    auto kernel_layout = src_conv2d->KernelDataLayout();
+    std::shared_ptr<IPermuteVector> required_pv, weight_required_pv;
+    switch (layout)
+    { // kernel layout must be IWHO in tflite & nnapi
+      case vx::DataLayout::CWHN:
+        required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+        break;
+      case vx::DataLayout::WHCN:
+        required_pv = MakeShared(4);
+        break;
+      default:
+        VSILOGE("The layout of input is not support.");
+        required_pv = MakeShared(4);
+        break;
     }
+    switch (kernel_layout) {
+      case vx::DataLayout::OcIcWH:  // Support TVM Kernel Layout
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kOcIcWH2WHIcOc);
+        break;
+      case vx::DataLayout::IcOcWH:
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kIcOcWH2WHIcOc);
+        break;
+      case vx::DataLayout::IcWHOc:  // Support nnapi & tflite Kernel Layout
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kIcWHOc2WHIcOc);
+        break;
+      default: // Default set to IWHO for compatibility with previous APIs
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kIcWHOc2WHIcOc);
+        break;
+    }
+
     auto input_tensors = op_->impl()->InputsTensor();
+    std::shared_ptr<vx::Tensor> infer_input, infer_weight, infer_bias;
+    // For input
+    auto input_pv = context_->GetPermuteVector(input_tensors[0]);
+    auto final_pv = input_pv->Reverse()->Add(required_pv);
+    if (!final_pv->IsAligned()) {
+      infer_input =
+          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      context_->SetPermuteVector(input_tensors[0], required_pv);
+    } else {
+      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      context_->SetPermuteVector(input_tensors[0], input_pv);
+    }
+    context_->UpdateTensorMap(input_tensors[0], infer_input);
 
-    for (const auto& in : input_tensors) {
-      std::shared_ptr<vx::Tensor> infer_tensor;
-      std::shared_ptr<IPermuteVector> trans_pv;
-      if (in->IsConstTensor() &&
-          !(in->GetSpec().attr_ & vx::TensorAttribute::INPUT)) {
-            // For bias
-            if (in->GetShape().size() == 1) {
-              infer_tensor = context_->infer_graph_->CreateTensor(
-                  in->GetSpec(), in->GetDataRef());
-              trans_pv = MakeShared(1);
-            } else {
-              // For input/weight
-              if (!required_pv->IsAligned()) {
-                auto src_conv2d = std::static_pointer_cast<vx::ops::Conv2d>(op_);
-                // Support TVM Kernel Layout
-                if (src_conv2d->KernelDataLayout() == vx::DataLayout::OcIcWH) {
-                  trans_pv = std::make_shared<PermuteVector<4>>(kOcIcWH2WHIcOc);
-                  infer_tensor = PermuteConstTensor(
-                      in, trans_pv);
-                } else if (src_conv2d->KernelDataLayout() == vx::DataLayout::IcOcWH) {
-                  trans_pv = std::make_shared<PermuteVector<4>>(kIcOcWH2WHIcOc);
-                  infer_tensor = PermuteConstTensor(
-                      in, trans_pv);
-                } else {
-                  infer_tensor = PermuteConstTensor(in, required_pv);
-                  trans_pv = required_pv;
-                }
-              } else {
-                infer_tensor = context_->infer_graph_->CreateTensor(
-                    in->GetSpec(), in->GetDataRef());
-                trans_pv = MakeShared(required_pv->Rank());
-              }
-            }
+    // For weight
+    if (input_tensors[1]->IsConstTensor()) {
+      if (!weight_required_pv->IsAligned()) {
+        infer_weight = PermuteConstTensor(input_tensors[1], weight_required_pv);
       } else {
-        // For bias
-        if (in->GetShape().size() == 1) {
-          infer_tensor = context_->GetMapedTensor(in);
-          trans_pv = MakeShared(1);
-        } else {
-          // For input/weight
-          auto pv = context_->GetPermuteVector(in);
-          auto final_pv = pv->Reverse()->Add(required_pv);
-          if (!final_pv->IsAligned()) {
-            infer_tensor =
-                InsertPermute(context_->GetMapedTensor(in), final_pv);
-            trans_pv = required_pv;
-          } else {
-            infer_tensor = context_->GetMapedTensor(in);
-            trans_pv = pv;
-          }
-        }
+        infer_weight = context_->infer_graph_->CreateTensor(
+            input_tensors[1]->GetSpec(), input_tensors[1]->GetDataRef());
       }
-      context_->UpdateTensorMap(in, infer_tensor);
-      context_->SetPermuteVector(in, trans_pv);
+      context_->SetPermuteVector(input_tensors[1], weight_required_pv);
+      context_->UpdateTensorMap(input_tensors[1], infer_weight);
+    } else {
+      auto weight_pv = context_->GetPermuteVector(input_tensors[1]);
+      auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
+      if (!final_pv->IsAligned()) {
+        infer_weight =
+            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+        context_->SetPermuteVector(input_tensors[1], weight_required_pv);
+      } else {
+        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        context_->SetPermuteVector(input_tensors[1], weight_pv);
+      }
+      context_->UpdateTensorMap(input_tensors[1], infer_weight);
     }
 
-    auto pad_type = TranslatePadType(op_->impl()->node()->nn_param.conv2d.pad_type);
-    std::array<uint32_t, 2> ksize = {
-      op_->impl()->node()->nn_param.conv2d.ksize[0],
-          op_->impl()->node()->nn_param.conv2d.ksize[1]
-    };
-    std::array<uint32_t, 2> stride = {
-      op_->impl()->node()->nn_param.conv2d.stride[0],
-      op_->impl()->node()->nn_param.conv2d.stride[1]
-    };
-    std::array<uint32_t, 2> dilation = {
-      op_->impl()->node()->nn_param.conv2d.dilation[0],
-      op_->impl()->node()->nn_param.conv2d.dilation[1]
-    };
-    std::array<uint32_t, 4> pad = {
-      op_->impl()->node()->nn_param.conv2d.pad[0],
-      op_->impl()->node()->nn_param.conv2d.pad[1],
-      op_->impl()->node()->nn_param.conv2d.pad[2],
-      op_->impl()->node()->nn_param.conv2d.pad[3]
-    };
-    int32_t multiplier = op_->impl()->node()->nn_param.conv2d.multiplier;
-    int32_t out_channels = op_->impl()->node()->nn_param.conv2d.weights;
-    auto conv2d = context_->infer_graph_->CreateOperation<vx::ops::Conv2d>(
-        out_channels, pad_type, ksize, stride, dilation, pad, multiplier,
-        vx::DataLayout::WHCN, vx::DataLayout::WHIcOc);
+    // For bias
+    if (input_tensors.size() == 3) {
+      if (input_tensors[2]->IsConstTensor()) {
+        infer_bias = context_->infer_graph_->CreateTensor(
+            input_tensors[2]->GetSpec(), input_tensors[2]->GetDataRef());
+      } else {
+        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+      }
+      auto bias_pv = MakeShared(1);
+      context_->UpdateTensorMap(input_tensors[2], infer_bias);
+      context_->SetPermuteVector(input_tensors[2], bias_pv);
+    }
+
+    auto conv2d = op_->Clone(context_->infer_graph_);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
       (*conv2d).BindInput(context_->GetMapedTensor(i_src));
diff --git a/src/tim/transform/ops/grouped_conv2d_layout_inference.h b/src/tim/transform/ops/grouped_conv2d_layout_inference.h
index 26f1dcf..3f58a72 100644
--- a/src/tim/transform/ops/grouped_conv2d_layout_inference.h
+++ b/src/tim/transform/ops/grouped_conv2d_layout_inference.h
@@ -40,75 +40,96 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
       : OpLayoutInfer(op, context) {}
   void OnInputs(
       std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
+    auto src_grouped_conv2d = std::static_pointer_cast<vx::ops::Conv2d>(op_);
     vx::DataLayout layout = op_->impl()->layout_;
-    auto required_pv = MakeShared(4);
-    if (layout == vx::DataLayout::CWHN) {
-      required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+    auto kernel_layout = src_grouped_conv2d->KernelDataLayout();
+    std::shared_ptr<IPermuteVector> required_pv, weight_required_pv;
+    switch (layout)
+    { // kernel layout must be IWHO in tflite & nnapi
+      case vx::DataLayout::CWHN:
+        required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+        break;
+      case vx::DataLayout::WHCN:
+        required_pv = MakeShared(4);
+        break;
+      default:
+        VSILOGE("The layout of input is not support.");
+        required_pv = MakeShared(4);
+        break;
     }
+    switch (kernel_layout) {
+      case vx::DataLayout::OcIcWH:  // Support TVM Kernel Layout
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kOcIcWH2WHIcOc);
+        break;
+      case vx::DataLayout::IcOcWH:
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kIcOcWH2WHIcOc);
+        break;
+      case vx::DataLayout::IcWHOc:  // Support nnapi & tflite Kernel Layout
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kIcWHOc2WHIcOc);
+        break;
+      default: // Default set to IWHO for compatibility with previous APIs
+        weight_required_pv = std::make_shared<PermuteVector<4>>(kIcWHOc2WHIcOc);
+        break;
+    }
+
     auto input_tensors = op_->impl()->InputsTensor();
+    std::shared_ptr<vx::Tensor> infer_input, infer_weight, infer_bias;
+    // For input
+    auto input_pv = context_->GetPermuteVector(input_tensors[0]);
+    auto final_pv = input_pv->Reverse()->Add(required_pv);
+    if (!final_pv->IsAligned()) {
+      infer_input =
+          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      context_->SetPermuteVector(input_tensors[0], required_pv);
+    } else {
+      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      context_->SetPermuteVector(input_tensors[0], input_pv);
+    }
+    context_->UpdateTensorMap(input_tensors[0], infer_input);
 
-    for (const auto& in : input_tensors) {
-      std::shared_ptr<vx::Tensor> infer_tensor;
-      std::shared_ptr<IPermuteVector> trans_pv;
-      if (in->IsConstTensor() &&
-          !(in->GetSpec().attr_ & vx::TensorAttribute::INPUT)) {
-            // For bias
-            if (in->GetShape().size() == 1) {
-              infer_tensor = context_->infer_graph_->CreateTensor(
-                  in->GetSpec(), in->GetDataRef());
-              trans_pv = MakeShared(1);
-            } else {
-              // For input/weight
-              if (!required_pv->IsAligned()) {
-                auto src_grouped_conv2d = std::static_pointer_cast<vx::ops::GroupedConv2d>(op_);
-                // Support TVM Kernel Layout
-                if (src_grouped_conv2d->KernelDataLayout() == vx::DataLayout::OcIcWH) {
-                  trans_pv = std::make_shared<PermuteVector<4>>(kOcIcWH2WHIcOc);
-                  infer_tensor = PermuteConstTensor(
-                      in, trans_pv);
-                } else if (src_grouped_conv2d->KernelDataLayout() == vx::DataLayout::IcOcWH) {
-                  trans_pv = std::make_shared<PermuteVector<4>>(kIcOcWH2WHIcOc);
-                  infer_tensor = PermuteConstTensor(
-                      in, trans_pv);
-                } else {
-                  infer_tensor = PermuteConstTensor(in, required_pv);
-                  trans_pv = required_pv;
-                }
-              } else {
-                infer_tensor = context_->infer_graph_->CreateTensor(
-                    in->GetSpec(), in->GetDataRef());
-                trans_pv = MakeShared(required_pv->Rank());
-              }
-            }
+    // For weight
+    if (input_tensors[1]->IsConstTensor()) {
+      if (!weight_required_pv->IsAligned()) {
+        infer_weight = PermuteConstTensor(input_tensors[1], weight_required_pv);
       } else {
-        // For bias
-        if (in->GetShape().size() == 1) {
-          infer_tensor = context_->GetMapedTensor(in);
-          trans_pv = MakeShared(1);
-        } else {
-          // For input/weight
-          auto pv = context_->GetPermuteVector(in);
-          auto final_pv = pv->Reverse()->Add(required_pv);
-          if (!final_pv->IsAligned()) {
-            infer_tensor =
-                InsertPermute(context_->GetMapedTensor(in), final_pv);
-            trans_pv = required_pv;
-          } else {
-            infer_tensor = context_->GetMapedTensor(in);
-            trans_pv = pv;
-          }
-        }
+        infer_weight = context_->infer_graph_->CreateTensor(
+            input_tensors[1]->GetSpec(), input_tensors[1]->GetDataRef());
       }
-      context_->UpdateTensorMap(in, infer_tensor);
-      context_->SetPermuteVector(in, trans_pv);
+      context_->SetPermuteVector(input_tensors[1], weight_required_pv);
+      context_->UpdateTensorMap(input_tensors[1], infer_weight);
+    } else {
+      auto weight_pv = context_->GetPermuteVector(input_tensors[1]);
+      auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
+      if (!final_pv->IsAligned()) {
+        infer_weight =
+            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+        context_->SetPermuteVector(input_tensors[1], weight_required_pv);
+      } else {
+        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        context_->SetPermuteVector(input_tensors[1], weight_pv);
+      }
+      context_->UpdateTensorMap(input_tensors[1], infer_weight);
     }
 
-    auto conv2d = op_->Clone(context_->infer_graph_);
+    // For bias
+    if (input_tensors.size() == 3) {
+      if (input_tensors[2]->IsConstTensor()) {
+        infer_bias = context_->infer_graph_->CreateTensor(
+            input_tensors[2]->GetSpec(), input_tensors[2]->GetDataRef());
+      } else {
+        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+      }
+      auto bias_pv = MakeShared(1);
+      context_->UpdateTensorMap(input_tensors[2], infer_bias);
+      context_->SetPermuteVector(input_tensors[2], bias_pv);
+    }
+
+    auto grouped_conv2d = op_->Clone(context_->infer_graph_);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*conv2d).BindInput(context_->GetMapedTensor(i_src));
+      (*grouped_conv2d).BindInput(context_->GetMapedTensor(i_src));
     }
-    (*conv2d).BindOutput(otensor_infer[0]);
+    (*grouped_conv2d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/vx/ops/conv2d_test.cc b/src/tim/vx/ops/conv2d_test.cc
index 40e576d..4667f91 100644
--- a/src/tim/vx/ops/conv2d_test.cc
+++ b/src/tim/vx/ops/conv2d_test.cc
@@ -1845,4 +1845,4 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
     f.push_back( q / (float)((int64_t)1 << fl_output));
   }
   EXPECT_EQ(golden, f);
-}
\ No newline at end of file
+}