Fixed the IOtensor order difference between src_graph and infer_graph

* Fixed the IOtensor order difference between src_graph and infer_graph Graph Input/Output tensor sequence may changed after graph transformation(layout infer), it is difficult to get the IO mapping between original graph and final graph. Clients such as the Android Support Library create tensors using the original input/output order, which may not be the same as the input order of src_graph, the data can not be setup correctly. Solution: Decide the order of inputs/outputs while creating tensor not at binding to operation. The order of binding could be change in each transform. Type:Code improvement Signed-off-by: Chen Xin <jack.chen@verisilicon.com> * Fixed maxpoolgrad maxpoolwithargmax2 cases Some tensors created with wrong attr Type: Bug fix Signed-off-by: Chen Xin <jack.chen@verisilicon.com> --------- Signed-off-by: Chen Xin <jack.chen@verisilicon.com> Co-authored-by: Chen Xin <jack.chen@verisilicon.com>
2023-03-21 09:21:15 +08:00 · 2023-03-21 09:21:15 +08:00 · 6424ef104e
parent 958b26e499
commit 6424ef104e
6 changed files with 71 additions and 16 deletions
--- a/src/tim/vx/builtin_op_impl.cc
+++ b/src/tim/vx/builtin_op_impl.cc
@ -46,8 +46,7 @@ BuiltinOpImpl& BuiltinOpImpl::BindInput(
  uint32_t tensor_id = tensor->GetId();
  node_->input.tensors[input_tensor_index++] = tensor_id;
  if (tensor->GetSpec().attr_ & TensorAttribute::INPUT) {
-    graph_->AddInput(tensor_id);
-    graph_->AddInput(tensor);
+    graph_->ConsumeInput();
  }
  return *this;
 }
@ -57,9 +56,8 @@ BuiltinOpImpl& BuiltinOpImpl::BindOutput(
  outputs_tensor_.push_back(tensor);
  uint32_t tensor_id = tensor->GetId();
  node_->output.tensors[output_tensor_index++] = tensor_id;
-  if (tensor->GetSpec().attr_ == TensorAttribute::OUTPUT) {
-    graph_->AddOutput(tensor_id);
-    graph_->AddOutput(tensor);
+  if (tensor->GetSpec().attr_ & TensorAttribute::OUTPUT) {
+    graph_->ConsumeOutput();
  }
  return *this;
 }
--- a/src/tim/vx/graph.cc
+++ b/src/tim/vx/graph.cc
@ -49,6 +49,8 @@ GraphImpl::GraphImpl(ContextImpl* context, const CompileOption& options)
    : context_(context),
      graph_(vsi_nn_CreateGraph(context_->context(), 0, 0)),
      tensor_placeholder_(nullptr),
+      not_consumed_input_cnt_(0),
+      not_consumed_output_cnt_(0),
      options_(options){}

 GraphImpl::~GraphImpl() { vsi_nn_ReleaseGraph(&graph_); }
@ -133,17 +135,50 @@ void GraphImpl::PrintGraph() const { vsi_nn_PrintGraph(this->graph_); }

 std::shared_ptr<Tensor> GraphImpl::CreateTensor(const TensorSpec& spec,
                                                const void* data) {
-  return std::make_shared<TensorImpl>(this, spec, data);
+  auto tensor = std::make_shared<TensorImpl>(this, spec, data);
+  if (spec.attr_ & TensorAttribute::INPUT) {
+    this->AddInput(tensor);
+    this->AddInput(tensor->GetId());
+    this->ProduceInput();
+  }
+  if (spec.attr_ & TensorAttribute::OUTPUT) {
+    this->AddOutput(tensor);
+    this->AddOutput(tensor->GetId());
+    this->ProduceOutput();
+  }
+  return tensor;
 }

 std::shared_ptr<Tensor> GraphImpl::CreateTensor(const TensorSpec& spec,
                                                const DmaBufferDesc& dmafd) {
-  return std::make_shared<TensorImpl>(this, spec, dmafd);
+  auto tensor = std::make_shared<TensorImpl>(this, spec, dmafd);
+  if (spec.attr_ & TensorAttribute::INPUT) {
+    this->AddInput(tensor);
+    this->AddInput(tensor->GetId());
+    this->ProduceInput();
+  }
+  if (spec.attr_ & TensorAttribute::OUTPUT) {
+    this->AddOutput(tensor);
+    this->AddOutput(tensor->GetId());
+    this->ProduceOutput();
+  }
+  return tensor;
 }

 std::shared_ptr<Tensor> GraphImpl::CreateIOTensor(const TensorSpec& spec,
                                                void* data) {
-  return std::make_shared<TensorImpl>(this, spec, data);
+  auto tensor = std::make_shared<TensorImpl>(this, spec, data);
+  if (spec.attr_ & TensorAttribute::INPUT) {
+    this->AddInput(tensor);
+    this->AddInput(tensor->GetId());
+    this->ProduceInput();
+  }
+  if (spec.attr_ & TensorAttribute::OUTPUT) {
+    this->AddOutput(tensor);
+    this->AddOutput(tensor->GetId());
+    this->ProduceOutput();
+  }
+  return tensor;
 }

 std::shared_ptr<Tensor> GraphImpl::CreateTensorPlaceHolder() {
@ -185,7 +220,15 @@ bool GraphImpl::Setup() {

 bool GraphImpl::Compile() {
  bool status = true;
-
+  if (not_consumed_input_cnt_ > 0 ) {
+    // Tensor can bind to different operations
+    VSILOGE("Graph has free input, INPUT tensor may be created with OUTPUT attr.");
+    return false;
+  }
+  if (not_consumed_output_cnt_ != 0) {
+    VSILOGE("Graph has free output, OUTPUT tensor may be created with INPUT attr.");
+    return false;
+  }
  status = Setup();
  std::call_once(verify_graph_once_, [&status, this]() {
    status = (VSI_SUCCESS == vsi_nn_VerifyGraph(this->graph_));
--- a/src/tim/vx/graph_private.h
+++ b/src/tim/vx/graph_private.h
@ -77,6 +77,10 @@ class GraphImpl : public Graph {
  bool Compile() override;
  bool CompileToBinary(void* buf, size_t* size) override;
  bool Run() override;
+  void ProduceInput() { not_consumed_input_cnt_++; }
+  void ProduceOutput() { not_consumed_output_cnt_++; }
+  void ConsumeInput() { not_consumed_input_cnt_--; }
+  void ConsumeOutput() { not_consumed_output_cnt_--; }

 protected:
  ContextImpl* context_;
@ -88,7 +92,9 @@ class GraphImpl : public Graph {
  std::vector<vsi_nn_tensor_id_t> inputs_;
  std::vector<vsi_nn_tensor_id_t> outputs_;
  std::vector<std::shared_ptr<Tensor>> inputs_tensor_;
+  int32_t not_consumed_input_cnt_;
  std::vector<std::shared_ptr<Tensor>> outputs_tensor_;
+  int32_t not_consumed_output_cnt_;
  std::map<std::shared_ptr<Tensor>, std::vector<std::shared_ptr<Operation>>> tensor_consumers_;
  std::map<std::shared_ptr<Tensor>, std::shared_ptr<Operation>> tensor_producer_;

--- a/src/tim/vx/ops/maxpoolgrad_test.cc
+++ b/src/tim/vx/ops/maxpoolgrad_test.cc
@ -95,7 +95,7 @@ TEST(Fuse_MaxpoolGrad, with_overlay) {

    auto input_tensor = graph->CreateTensor(input_spec);
    auto updates_tensor = graph->CreateTensor(updates_spec);
-    auto output_tensor = graph->CreateTensor(input_spec);
+    auto output_tensor = graph->CreateTensor(output_spec);

    std::vector<float> in_data = {
        7, 2, 5, 3, 8,
@ -124,7 +124,7 @@ TEST(Fuse_MaxpoolGrad, with_overlay) {

    EXPECT_TRUE(graph->Compile());
    EXPECT_TRUE(graph->Run());
-    
+
    std::vector<float> output_values(golden.size());
    EXPECT_TRUE(output_tensor->CopyDataFromTensor(output_values.data()));
    EXPECT_EQ(golden, output_values);
@ -145,7 +145,7 @@ TEST(Fuse_MaxpoolGrad, with_overlay_multi_channel_multi_batch) {

    auto input_tensor = graph->CreateTensor(input_spec);
    auto updates_tensor = graph->CreateTensor(updates_spec);
-    auto output_tensor = graph->CreateTensor(input_spec);
+    auto output_tensor = graph->CreateTensor(output_spec);

    std::vector<float> in_data = {
        7, 2, 5, 3, 8,
@ -204,7 +204,7 @@ TEST(Fuse_MaxpoolGrad, with_overlay_multi_channel_multi_batch) {

    EXPECT_TRUE(graph->Compile());
    EXPECT_TRUE(graph->Run());
-    
+
    std::vector<float> output_values(golden.size());
    EXPECT_TRUE(output_tensor->CopyDataFromTensor(output_values.data()));
    EXPECT_EQ(golden, output_values);
--- a/src/tim/vx/ops/maxpoolwithargmax2_test.cc
+++ b/src/tim/vx/ops/maxpoolwithargmax2_test.cc
@ -138,11 +138,13 @@ TEST(MaxpoolGrad, without_overlay) {
                            out_shape, tim::vx::TensorAttribute::TRANSIENT);
    tim::vx::TensorSpec output_spec_values(tim::vx::DataType::FLOAT32,
                            out_shape, tim::vx::TensorAttribute::OUTPUT);
+    tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32,
+                            in_shape, tim::vx::TensorAttribute::OUTPUT);

    auto input_tensor = graph->CreateTensor(input_spec);
    auto output_tensor_indices = graph->CreateTensor(output_spec_indices);
    auto output_tensor_values = graph->CreateTensor(output_spec_values);
-    auto output_tensor = graph->CreateTensor(input_spec);
+    auto output_tensor = graph->CreateTensor(output_spec);

    std::vector<float> in_data = {
        7, 2, 5, 3, 10, 2,
@ -210,11 +212,13 @@ TEST(MaxpoolGrad, with_overlay) {
                            out_shape, tim::vx::TensorAttribute::TRANSIENT);
    tim::vx::TensorSpec output_spec_values(tim::vx::DataType::FLOAT32,
                            out_shape, tim::vx::TensorAttribute::OUTPUT);
+    tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32,
+                            in_shape, tim::vx::TensorAttribute::OUTPUT);

    auto input_tensor = graph->CreateTensor(input_spec);
    auto output_tensor_indices = graph->CreateTensor(output_spec_indices);
    auto output_tensor_values = graph->CreateTensor(output_spec_values);
-    auto output_tensor = graph->CreateTensor(input_spec);
+    auto output_tensor = graph->CreateTensor(output_spec);

    std::vector<float> in_data = {
        7, 2, 5, 3, 8,
@ -282,11 +286,13 @@ TEST(MaxpoolGrad, with_overlay_multi_channel_multi_batch) {
                            out_shape, tim::vx::TensorAttribute::TRANSIENT);
    tim::vx::TensorSpec output_spec_values(tim::vx::DataType::FLOAT32,
                            out_shape, tim::vx::TensorAttribute::OUTPUT);
+    tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32,
+                            in_shape, tim::vx::TensorAttribute::OUTPUT);

    auto input_tensor = graph->CreateTensor(input_spec);
    auto output_tensor_indices = graph->CreateTensor(output_spec_indices);
    auto output_tensor_values = graph->CreateTensor(output_spec_values);
-    auto output_tensor = graph->CreateTensor(input_spec);
+    auto output_tensor = graph->CreateTensor(output_spec);

    std::vector<float> in_data = {
        7, 2, 5, 3, 8,
--- a/src/tim/vx/tensor_private.h
+++ b/src/tim/vx/tensor_private.h
@ -56,6 +56,7 @@ class TensorImpl : public Tensor {
  bool IsConstTensor() {
    return spec_.attr_ == tim::vx::TensorAttribute::CONSTANT;
  }
+
  const void* GetDataRef() const { return data_; }

  GraphImpl* graph_;
@ -94,6 +95,7 @@ class TensorPlaceholder : public Tensor {
  bool IsConstTensor() {
    return spec_.attr_ == tim::vx::TensorAttribute::CONSTANT;
  }
+
  const void* GetDataRef() const { return nullptr; }

  vsi_nn_tensor_id_t id_;