Refine layout inference (#671)

* Remove unnecessary compiler flags * Refactor CMakeLists.txt * Tweak CMakeLists.txt for libtim_internal * Tweak CMakeLists.txt for libtim-vx * Make TIM_VX_ENABLE_TEST defaults to OFF * Eliminate usage of include_directories * Fix CI unit test * Fix warnings relating to inheritance * Keep graph output order in layout inference Type: Code Improvement * Fix typos in layout inference Type: Code Improvement --------- authored-by: Xiaoran Weng <Xiaoran.Weng@verisilicon.com>
2023-12-20 21:26:16 +08:00 · 2023-12-20 21:26:16 +08:00 · feaf06365b
parent 622c472edf
commit feaf06365b
47 changed files with 318 additions and 263 deletions
--- a/src/tim/transform/layout_infer_context.h
+++ b/src/tim/transform/layout_infer_context.h
@ -1,16 +1,18 @@
 #ifndef TIM_VX_LAYOUT_INFER_CONTEXT_H_
 #define TIM_VX_LAYOUT_INFER_CONTEXT_H_
+
 #include "permute_vector.h"
 #include "tim/transform/layout_inference.h"

+#include <unordered_map>
+
 namespace tim {
 namespace transform {
 namespace layout_inference_impl {
 class LayoutInferContext {
 public:
  LayoutInferContext(const std::shared_ptr<vx::Graph>& src_graph,
-                     std::shared_ptr<vx::Graph>& infer_graph)
-      : src_graph_(src_graph), infer_graph_(infer_graph) {}
+                     std::shared_ptr<vx::Graph>& infer_graph);
  void SetPermuteVector(std::shared_ptr<vx::Tensor> tensor,
                        std::shared_ptr<IPermuteVector> pv);
  const std::shared_ptr<IPermuteVector> GetPermuteVector(
@ -20,14 +22,18 @@ class LayoutInferContext {
  bool IsReadyForInfer(const std::shared_ptr<vx::Operation>& op) const;
  void UpdateTensorMap(const std::shared_ptr<vx::Tensor>& t_src,
                       const std::shared_ptr<vx::Tensor>& t_layout);
-  std::shared_ptr<vx::Tensor> GetMapedTensor(
+  std::shared_ptr<vx::Tensor> GetMappedTensor(
+      const std::shared_ptr<vx::Tensor>& t_src) const;
+  std::shared_ptr<vx::Tensor> GetMappedGraphInputTensor(
+      const std::shared_ptr<vx::Tensor>& t_src) const;
+  std::shared_ptr<vx::Tensor> GetMappedGraphOutputTensor(
      const std::shared_ptr<vx::Tensor>& t_src) const;

  void UpdateGraphInputMap(const std::shared_ptr<vx::Tensor>& i_src,
                           const std::shared_ptr<vx::Tensor>& i_layout);

  void UpdateGraphOutputMap(const std::shared_ptr<vx::Tensor>& o_src,
-                           const std::shared_ptr<vx::Tensor>& o_layout);
+                            const std::shared_ptr<vx::Tensor>& o_layout);

  std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<vx::Tensor>>
  GetGraphInputMap() const {
@ -44,7 +50,7 @@ class LayoutInferContext {
 private:
  std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<IPermuteVector>>
      tensor_pv_;
-  std::vector<std::shared_ptr<vx::Operation>> visited_op_;
+  std::unordered_map<std::shared_ptr<vx::Operation>, bool> op_visited_;
  // tensor_in_src -> tensor_in_layout
  std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<vx::Tensor>>
      tensor_map_;
--- a/src/tim/transform/layout_inference.cc
+++ b/src/tim/transform/layout_inference.cc
@ -73,7 +73,7 @@
 #include "ops/roi_pool_layout_inference.h"

 #include <algorithm>
-#include <deque>
+#include <queue>

 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
@ -87,7 +87,16 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
    std::shared_ptr<layout_inference_impl::LayoutInferContext>& ctx,
    const std::shared_ptr<vx::Operation>& op);

-// Implemention for LayoutInferContext
+// Implementation for LayoutInferContext
+LayoutInferContext::LayoutInferContext(
+    const std::shared_ptr<vx::Graph>& src_graph,
+    std::shared_ptr<vx::Graph>& infer_graph)
+    : src_graph_(src_graph), infer_graph_(infer_graph) {
+  for (const auto& op : src_graph->OpVector()) {
+    op_visited_[op] = false;
+  }
+}
+
 void LayoutInferContext::SetPermuteVector(std::shared_ptr<vx::Tensor> tensor,
                                          std::shared_ptr<IPermuteVector> pv) {
  if (tensor_pv_.end() != tensor_pv_.find(tensor)) {
@ -110,27 +119,19 @@ const std::shared_ptr<IPermuteVector> LayoutInferContext::GetPermuteVector(
 }

 void LayoutInferContext::MarkVisited(const std::shared_ptr<vx::Operation>& op) {
-  if (visited_op_.end() !=
-      std::find(visited_op_.begin(), visited_op_.end(), op)) {
-    VSILOGW("The operation has been mark as visited.");
-  } else {
-    visited_op_.push_back(op);
-  }
+  op_visited_[op] = true;
 }

-bool LayoutInferContext::IsVisited(const std::shared_ptr<vx::Operation>& op) const {
-  if (visited_op_.end() !=
-      std::find(visited_op_.begin(), visited_op_.end(), op)) {
-    return true;
-  } else {
-    return false;
-  }
+bool LayoutInferContext::IsVisited(
+    const std::shared_ptr<vx::Operation>& op) const {
+  return op_visited_.at(op);
 }

 bool LayoutInferContext::IsReadyForInfer(
    const std::shared_ptr<vx::Operation>& op) const {
  for (const auto& tensor : op->impl()->InputsTensor()) {
-    if (!tensor->IsConstTensor() && tensor->GetId() != (uint32_t)-1 &&
+    if (!tensor->IsConstTensor() &&
+        tensor->GetId() != static_cast<uint32_t>(-1) &&
        (tensor_pv_.end() == tensor_pv_.find(tensor))) {
      return false;
    }
@ -144,68 +145,91 @@ void LayoutInferContext::UpdateTensorMap(
  tensor_map_[t_src] = t_layout;
 }

-std::shared_ptr<vx::Tensor> LayoutInferContext::GetMapedTensor(
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedTensor(
    const std::shared_ptr<vx::Tensor>& t_src) const {
  auto it = tensor_map_.find(t_src);
  if (it != tensor_map_.end()) {
    return it->second;
-  } else {
-    VSILOGE("Tensor has not beed inserted in tensor map.");
-    assert(false);
  }

+  VSILOGE("Tensor has not beed inserted in tensor map.");
  return nullptr;
 }

-void LayoutInferContext::UpdateGraphInputMap(const std::shared_ptr<vx::Tensor>& i_src,
-                           const std::shared_ptr<vx::Tensor>& i_layout) {
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedGraphInputTensor(
+    const std::shared_ptr<vx::Tensor>& t_src) const {
+  auto it = graph_input_map_.find(t_src);
+  if (it != tensor_map_.end()) {
+    return it->second;
+  }
+
+  VSILOGE("Tensor has not beed inserted in graph input tensor map.");
+  return nullptr;
+}
+
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedGraphOutputTensor(
+    const std::shared_ptr<vx::Tensor>& t_src) const {
+  auto it = graph_output_map_.find(t_src);
+  if (it != tensor_map_.end()) {
+    return it->second;
+  }
+
+  VSILOGE("Tensor has not beed inserted in graph output tensor map.");
+  return nullptr;
+}
+
+void LayoutInferContext::UpdateGraphInputMap(
+    const std::shared_ptr<vx::Tensor>& i_src,
+    const std::shared_ptr<vx::Tensor>& i_layout) {
  graph_input_map_[i_src] = i_layout;
 }

-void LayoutInferContext::UpdateGraphOutputMap(const std::shared_ptr<vx::Tensor>& o_src,
-                           const std::shared_ptr<vx::Tensor>& o_layout) {
+void LayoutInferContext::UpdateGraphOutputMap(
+    const std::shared_ptr<vx::Tensor>& o_src,
+    const std::shared_ptr<vx::Tensor>& o_layout) {
  graph_output_map_[o_src] = o_layout;
 }

-#define REGIST_LAYOUT_INFERENCE(op_idx, name)                     \
+#define REGISTER_LAYOUT_INFERENCE(op_idx, name)                   \
  case op_idx: {                                                  \
    auto op_infer = std::make_shared<name##LayoutInfer>(op, ctx); \
    op_infer->OnInputs(next_tensors);                             \
    op_infer->OnOutputs(next_tensors);                            \
    break;                                                        \
-  }                                                               \
+  }

-#define REGIST_REDUCE_LAYOUT_INFERENCE(op_idx)                                 \
-  case op_idx: {                                                               \
-    auto reduce_type = op->impl()->node()->nn_param.reduce.type;               \
-    switch (reduce_type) {                                                     \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);                 \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);                 \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);                   \
-    default:                                                                   \
-      VSILOGW("Op %d: Default layout inference pass for reduce.", reduce_type);\
-      assert(false);                                                           \
-    }                                                                          \
-    break;                                                                     \
-  }                                                                            \
+#define REGISTER_REDUCE_LAYOUT_INFERENCE(op_idx)                    \
+  case op_idx: {                                                    \
+    auto reduce_type = op->impl()->node()->nn_param.reduce.type;    \
+    switch (reduce_type) {                                          \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);    \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);    \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);      \
+      default:                                                      \
+        VSILOGW("Op %d: Default layout inference pass for reduce.", \
+                reduce_type);                                       \
+        assert(false);                                              \
+    }                                                               \
+    break;                                                          \
+  }

-#define REGIST_LOGICAL_LAYOUT_INFERENCE(op_idx)                                  \
-  case op_idx: {                                                                 \
-    auto logical_type = op->impl()->node()->nn_param.relational_ops.op;          \
-    switch (logical_type)                                                        \
-    {                                                                            \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);                     \
-    default:                                                                     \
-      VSILOGW("Op %d: Default layout inference pass for logical.", logical_type);\
-      assert(false);                                                             \
-    }                                                                            \
-    break;                                                                       \
-  }                                                                              \
+#define REGISTER_LOGICAL_LAYOUT_INFERENCE(op_idx)                       \
+  case op_idx: {                                                        \
+    auto logical_type = op->impl()->node()->nn_param.relational_ops.op; \
+    switch (logical_type) {                                             \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);        \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);          \
+      default:                                                          \
+        VSILOGW("Op %d: Default layout inference pass for logical.",    \
+                logical_type);                                          \
+        assert(false);                                                  \
+    }                                                                   \
+    break;                                                              \
+  }

 std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
    std::shared_ptr<layout_inference_impl::LayoutInferContext>& ctx,
@ -214,78 +238,80 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
  auto op_id = op->impl()->kind_;
  std::vector<std::shared_ptr<vx::Tensor>> next_tensors;
  switch (op_id) {
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV2D, Conv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GROUPED_CONV2D, GroupedConv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU, Relu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU1, Relu1);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU6, Relu6);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ELU, Elu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SIGMOID, Sigmoid);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MISH, Mish);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_HARD_SIGMOID, HardSigmoid);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SOFTRELU, SoftRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SWISH, HardSwish);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_TANH, Tanh);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LEAKY_RELU, LeakyRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONCAT, Concat);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADD, Add);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SUBTRACT, Sub);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MULTIPLY, Multiply);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DIVIDE, Div);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_POW, Pow);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MINIMUM, Minimum);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MAXIMUM, Maximum);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DATACONVERT, DataConvert);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_NEG, Neg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ABS, Abs);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SIN, Sin);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXP, Exp);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LOG, Log);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQRT, Sqrt);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RSQRT, Rsqrt);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQUARE, Square);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_NOT, LogicalNot);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_POOL, Pool2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SOFTMAX, Softmax);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQUEEZE, Squeeze);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_STACK, Stack);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2DEPTH, SpaceToDepth);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DEPTH2SPACE, DepthToSpace);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2BATCH, Space2Batch);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BATCH2SPACE, Batch2Space);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PAD, Pad);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PAD2, PadV2);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_FCL2, FullyConnected);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RESIZE, Resize);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPLIT, Split);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_STRIDED_SLICE, StridedSlice);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER_ND, GatherNd);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_REVERSE, Reverse);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SLICE, Slice);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SELECT, Select);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ARGMAX, Arg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ARGMIN, Arg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DECONVOLUTION, DeConv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BATCH_NORM, BatchNorm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PERMUTE, Transpose);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN, UnidirectionalRnn);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN, BidirectionalRnn);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONV2D, Conv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GROUPED_CONV2D, GroupedConv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU, Relu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU1, Relu1);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU6, Relu6);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ELU, Elu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SIGMOID, Sigmoid);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MISH, Mish);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_HARD_SIGMOID, HardSigmoid);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SOFTRELU, SoftRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SWISH, HardSwish);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_TANH, Tanh);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LEAKY_RELU, LeakyRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONCAT, Concat);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ADD, Add);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SUBTRACT, Sub);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MULTIPLY, Multiply);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DIVIDE, Div);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_POW, Pow);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MINIMUM, Minimum);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MAXIMUM, Maximum);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DATACONVERT, DataConvert);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_NEG, Neg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ABS, Abs);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SIN, Sin);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_EXP, Exp);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LOG, Log);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQRT, Sqrt);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RSQRT, Rsqrt);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQUARE, Square);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_NOT, LogicalNot);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_POOL, Pool2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SOFTMAX, Softmax);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQUEEZE, Squeeze);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_STACK, Stack);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2DEPTH, SpaceToDepth);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DEPTH2SPACE, DepthToSpace);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2BATCH, Space2Batch);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BATCH2SPACE, Batch2Space);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PAD, Pad);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PAD2, PadV2);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_FCL2, FullyConnected);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RESIZE, Resize);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPLIT, Split);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_STRIDED_SLICE, StridedSlice);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GATHER_ND, GatherNd);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_REVERSE, Reverse);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SLICE, Slice);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SELECT, Select);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ARGMAX, Arg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ARGMIN, Arg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DECONVOLUTION, DeConv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BATCH_NORM, BatchNorm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PERMUTE, Transpose);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN,
+                              UnidirectionalRnn);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN,
+                              BidirectionalRnn);
 #ifdef VSI_FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4);
 #endif
-    REGIST_LOGICAL_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_OPS);
-    REGIST_REDUCE_LAYOUT_INFERENCE(VSI_NN_OP_REDUCE);
+    REGISTER_LOGICAL_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_OPS);
+    REGISTER_REDUCE_LAYOUT_INFERENCE(VSI_NN_OP_REDUCE);
    // use default layout inference
    default: {
      VSILOGW("Op %d: default layout inference pass.", op_id);
@ -312,13 +338,13 @@ LayoutInference(
      std::make_shared<layout_inference_impl::LayoutInferContext>(src_graph,
                                                                  infer_graph);

-  std::deque<std::shared_ptr<vx::Tensor>> tensor_queue;
+  std::queue<std::shared_ptr<vx::Tensor>> tensor_queue;
  auto graph_inputs = src_graph->InputsTensor();
  for (const auto& t_src : graph_inputs) {
    auto input = infer_graph->CreateTensor(t_src->GetSpec());
    layout_infer_ctx->UpdateTensorMap(t_src, input);
    layout_infer_ctx->UpdateGraphInputMap(t_src, input);
-    tensor_queue.push_back(t_src);
+    tensor_queue.push(t_src);
    layout_infer_ctx->SetPermuteVector(
        t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end()
                   ? tensor_pv_map[t_src]
@ -329,27 +355,39 @@ LayoutInference(
  for (auto const_in : const_inputs) {
    std::vector<uint8_t> dataRef(const_in->GetSpec().GetByteSize());
    const_in->CopyDataFromTensor(dataRef.data());
-    auto input =
-        infer_graph->CreateTensor(const_in->GetSpec(), (const void*)dataRef.data());
+    auto input = infer_graph->CreateTensor(const_in->GetSpec(),
+                                           (const void*)dataRef.data());
    layout_infer_ctx->UpdateTensorMap(const_in, input);
-    tensor_queue.push_back(const_in);
+    tensor_queue.push(const_in);
    layout_infer_ctx->SetPermuteVector(
        const_in, tensor_pv_map.find(const_in) != tensor_pv_map.end()
-                   ? tensor_pv_map[const_in]
-                   : MakeShared(const_in->GetShape().size()));
+                      ? tensor_pv_map[const_in]
+                      : MakeShared(const_in->GetShape().size()));
+  }
+
+  auto graph_outputs = src_graph->OutputsTensor();
+  for (const auto& t_src : graph_outputs) {
+    auto output = infer_graph->CreateTensor(t_src->GetSpec());
+    layout_infer_ctx->UpdateTensorMap(t_src, output);
+    layout_infer_ctx->UpdateGraphOutputMap(t_src, output);
+    tensor_queue.push(t_src);
+    layout_infer_ctx->SetPermuteVector(
+        t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end()
+                   ? tensor_pv_map[t_src]
+                   : MakeShared(t_src->GetShape().size()));
  }

  while (!tensor_queue.empty()) {
    auto tensor = tensor_queue.front();
-    tensor_queue.pop_front();
+    tensor_queue.pop();
    const auto& consumers = src_graph->GetConsumersOp(tensor);
    for (const auto& op : consumers) {
-      if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ !=-1 &&
+      if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ != -1 &&
          layout_infer_ctx->IsReadyForInfer(op)) {
        auto next_tensors =
            layout_inference_impl::HandleLayoutInfer(layout_infer_ctx, op);
        for (const auto& t : next_tensors) {
-          tensor_queue.push_back(t);
+          tensor_queue.push(t);
        }
      }
    }
--- a/src/tim/transform/ops/activation_layout_inference.h
+++ b/src/tim/transform/ops/activation_layout_inference.h
@ -51,7 +51,7 @@ class ActivationLayoutInfer : public OpLayoutInfer {
    auto activation = op_->Clone(context_->infer_graph_);
    auto out_infer = CreateOutputsTensor(input_pv);
    (*activation)
-        .BindInput(context_->GetMapedTensor(i_src))
+        .BindInput(context_->GetMappedTensor(i_src))
        .BindOutput(out_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
@ -107,7 +107,7 @@ class PReluLayoutInfer : public OpLayoutInfer {
            context_->infer_graph_->CreateOperation<vx::ops::Reshape>(
                boardcast_shape);
        (*reshape)
-            .BindInput(context_->GetMapedTensor(src_slope))
+            .BindInput(context_->GetMappedTensor(src_slope))
            .BindOutput(reshape_out);
        context_->UpdateTensorMap(src_slope, reshape_out);
      }
@ -130,8 +130,8 @@ class PReluLayoutInfer : public OpLayoutInfer {
    auto out_infer = CreateOutputsTensor(input_pv);

    (*prelu)
-        .BindInput(context_->GetMapedTensor(src_input))
-        .BindInput(context_->GetMapedTensor(src_slope));
+        .BindInput(context_->GetMappedTensor(src_input))
+        .BindInput(context_->GetMappedTensor(src_slope));
    (*prelu).BindOutput(out_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
--- a/src/tim/transform/ops/addn_layout_inference.h
+++ b/src/tim/transform/ops/addn_layout_inference.h
@ -44,7 +44,7 @@ class AddNLayoutInfer : public OpLayoutInfer {
    auto addn = op_->Clone(context_->infer_graph_);

    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*addn).BindInput(context_->GetMapedTensor(i_src));
+      (*addn).BindInput(context_->GetMappedTensor(i_src));
    }
    auto infer_out = CreateOutputsTensor(required_pv);
    (*addn).BindOutput(infer_out[0]);
--- a/src/tim/transform/ops/arg_layout_inference.h
+++ b/src/tim/transform/ops/arg_layout_inference.h
@ -45,7 +45,7 @@ class ArgLayoutInfer : public OpLayoutInfer {

    auto arg = op_->Clone(context_->infer_graph_);
    auto infer_out = CreateOutputsTensor(input_pv);
-    (*arg).BindInput(context_->GetMapedTensor(src_input));
+    (*arg).BindInput(context_->GetMappedTensor(src_input));
    (*arg).BindOutput(infer_out[0]);

    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
--- a/src/tim/transform/ops/batch2space_layout_inference.h
+++ b/src/tim/transform/ops/batch2space_layout_inference.h
@ -51,7 +51,7 @@ class Batch2SpaceLayoutInfer : public OpLayoutInfer {
    auto final_pv = pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->UpdateTensorMap(input_tensors[0], perm_out);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    }
@ -70,7 +70,7 @@ class Batch2SpaceLayoutInfer : public OpLayoutInfer {
        context_->infer_graph_->CreateOperation<vx::ops::Batch2Space>(
            block_size, crop, vx::DataLayout::WHCN);
    auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*batch2space).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*batch2space).BindInput(context_->GetMappedTensor(input_tensors[0]));
    (*batch2space).BindOutput(out_tensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
    // Add out tensor of src_graph into next_tensor
--- a/src/tim/transform/ops/batchnorm_layout_inference.h
+++ b/src/tim/transform/ops/batchnorm_layout_inference.h
@ -57,7 +57,7 @@ class BatchNormLayoutInfer : public OpLayoutInfer {
            perm_out = context_->infer_graph_->CreateTensor(src_in->GetSpec(), (const void*)dataRef.data());
            input_pv = MakeShared(src_in->GetShape().size());
        } else {
-          perm_out = context_->GetMapedTensor(src_in);
+          perm_out = context_->GetMappedTensor(src_in);
          input_pv = context_->GetPermuteVector(src_in);
          context_->SetPermuteVector(src_in, input_pv);
          if (idx == 0) {
@ -73,11 +73,11 @@ class BatchNormLayoutInfer : public OpLayoutInfer {

    auto batchnorm = op_->Clone(context_->infer_graph_);
    auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[0]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[1]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[2]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[3]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[4]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[0]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[1]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[2]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[3]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[4]));

    (*batchnorm).BindOutput(out_tensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
+++ b/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
@ -72,7 +72,7 @@ class BidirectionalRnnLayoutInfer : public OpLayoutInfer {


    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
    }

    
--- a/src/tim/transform/ops/broadcast_layout_inference.h
+++ b/src/tim/transform/ops/broadcast_layout_inference.h
@ -46,7 +46,7 @@ class BroadcastLayoutInfer : public OpLayoutInfer {
    auto cloned_op = op_->Clone(context_->infer_graph_);

    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
    }

    std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
--- a/src/tim/transform/ops/concat_layout_inferene.h
+++ b/src/tim/transform/ops/concat_layout_inferene.h
@ -47,7 +47,7 @@ class ConcatLayoutInfer : public OpLayoutInfer {
    auto concat = context_->infer_graph_->CreateOperation<vx::ops::Concat>(
        axis, op_->impl()->InputsTensor().size());
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*concat).BindInput(context_->GetMapedTensor(i_src));
+      (*concat).BindInput(context_->GetMappedTensor(i_src));
    }
    auto out_infer = CreateOutputsTensor(required_pv);
    (*concat).BindOutput(out_infer[0]);
--- a/src/tim/transform/ops/conv2d_layout_inference.h
+++ b/src/tim/transform/ops/conv2d_layout_inference.h
@ -79,10 +79,10 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
    auto final_pv = input_pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
      context_->SetPermuteVector(input_tensors[0], input_pv);
    }
    context_->UpdateTensorMap(input_tensors[0], infer_input);
@ -104,10 +104,10 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
      auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
      if (!final_pv->IsAligned()) {
        infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
        context_->SetPermuteVector(input_tensors[1], weight_required_pv);
      } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
        context_->SetPermuteVector(input_tensors[1], weight_pv);
      }
      context_->UpdateTensorMap(input_tensors[1], infer_weight);
@ -121,7 +121,7 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
        infer_bias = context_->infer_graph_->CreateTensor(
            input_tensors[2]->GetSpec(), (const void*)dataRef.data());
      } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
      }
      auto bias_pv = MakeShared(1);
      context_->UpdateTensorMap(input_tensors[2], infer_bias);
@ -131,7 +131,7 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
    auto conv2d = op_->Clone(context_->infer_graph_);
    auto otensor_infer = CreateOutputsTensor(required_pv);
    for (const auto& i_src : input_tensors) {
-      (*conv2d).BindInput(context_->GetMapedTensor(i_src));
+      (*conv2d).BindInput(context_->GetMappedTensor(i_src));
    }
    (*conv2d).BindOutput(otensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/conv3d_layout_inference.h
+++ b/src/tim/transform/ops/conv3d_layout_inference.h
@ -81,7 +81,7 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
      } else {
        // For bias
        if (in->GetShape().size() == 1) {
-          infer_tensor = context_->GetMapedTensor(in);
+          infer_tensor = context_->GetMappedTensor(in);
          trans_pv = MakeShared(1);
        } else {
          // For input/weight
@ -89,10 +89,10 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
          auto final_pv = pv->Reverse()->Add(required_pv);
          if (!final_pv->IsAligned()) {
            infer_tensor =
-                InsertPermute(context_->GetMapedTensor(in), final_pv);
+                InsertPermute(context_->GetMappedTensor(in), final_pv);
            trans_pv = required_pv;
          } else {
-            infer_tensor = context_->GetMapedTensor(in);
+            infer_tensor = context_->GetMappedTensor(in);
            trans_pv = pv;
          }
        }
@ -131,7 +131,7 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
        vx::DataLayout::WHDCN, vx::DataLayout::WHDIcOc);
    auto otensor_infer = CreateOutputsTensor(required_pv);
    for (const auto& i_src : input_tensors) {
-      (*conv3d).BindInput(context_->GetMapedTensor(i_src));
+      (*conv3d).BindInput(context_->GetMappedTensor(i_src));
    }
    (*conv3d).BindOutput(otensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/deconv2d_layout_inference.h
+++ b/src/tim/transform/ops/deconv2d_layout_inference.h
@ -79,10 +79,10 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
    auto final_pv = input_pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
      context_->SetPermuteVector(input_tensors[0], input_pv);
    }
    context_->UpdateTensorMap(input_tensors[0], infer_input);
@ -104,10 +104,10 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
      auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
      if (!final_pv->IsAligned()) {
        infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
        context_->SetPermuteVector(input_tensors[1], weight_required_pv);
      } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
        context_->SetPermuteVector(input_tensors[1], weight_pv);
      }
      context_->UpdateTensorMap(input_tensors[1], infer_weight);
@ -121,7 +121,7 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
        infer_bias = context_->infer_graph_->CreateTensor(
            input_tensors[2]->GetSpec(), (const void*)dataRef.data());
      } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
      }
      auto bias_pv = MakeShared(1);
      context_->UpdateTensorMap(input_tensors[2], infer_bias);
@ -131,7 +131,7 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
    auto deconv = op_->Clone(context_->infer_graph_);
    auto infer_out = CreateOutputsTensor(required_pv);
    for (const auto& i_src : input_tensors) {
-      (*deconv).BindInput(context_->GetMapedTensor(i_src));
+      (*deconv).BindInput(context_->GetMappedTensor(i_src));
    }
    (*deconv).BindOutput(infer_out[0]);

--- a/src/tim/transform/ops/default_layout_inference.h
+++ b/src/tim/transform/ops/default_layout_inference.h
@ -53,7 +53,7 @@ class DefaultLayoutInfer : public OpLayoutInfer {
    auto cloned_op = op_->Clone(context_->infer_graph_);

    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
    }

    std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
--- a/src/tim/transform/ops/depth2space_layout_inference.h
+++ b/src/tim/transform/ops/depth2space_layout_inference.h
@ -52,7 +52,7 @@ class DepthToSpaceLayoutInfer : public OpLayoutInfer {
    auto final_pv = pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->UpdateTensorMap(input_tensors[0], perm_out);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    }
@ -63,7 +63,7 @@ class DepthToSpaceLayoutInfer : public OpLayoutInfer {
        context_->infer_graph_->CreateOperation<vx::ops::DepthToSpace>(
            block_size, vx::DataLayout::WHCN);
    auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2depth).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2depth).BindInput(context_->GetMappedTensor(input_tensors[0]));
    (*space2depth).BindOutput(out_tensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
    // Add out tensor of src_graph into next_tensor
--- a/src/tim/transform/ops/elementwise_layout_inference.h
+++ b/src/tim/transform/ops/elementwise_layout_inference.h
@ -71,7 +71,7 @@ class ElementWiseLayoutInfer : public OpLayoutInfer {
    auto required_pv = AlignPermuteVectorForElementWise();
    auto elementwise = context_->infer_graph_->CreateOperation<OpType>();
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*elementwise).BindInput(context_->GetMapedTensor(i_src));
+      (*elementwise).BindInput(context_->GetMappedTensor(i_src));
    }
    auto out_infer = CreateOutputsTensor(required_pv);
    (*elementwise).BindOutput(out_infer[0]);
@ -120,7 +120,7 @@ class MultiplyLayoutInfer : public OpLayoutInfer {
        context_->infer_graph_->CreateOperation<tim::vx::ops::Multiply>(
            op_->impl()->node()->nn_param.multiply.scale);
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*multiply).BindInput(context_->GetMapedTensor(i_src));
+      (*multiply).BindInput(context_->GetMappedTensor(i_src));
    }
    auto out_infer = CreateOutputsTensor(required_pv);
    (*multiply).BindOutput(out_infer[0]);
--- a/src/tim/transform/ops/fullyconnected_layout_inference.h
+++ b/src/tim/transform/ops/fullyconnected_layout_inference.h
@ -65,7 +65,7 @@ class FullyConnectedLayoutInfer : public OpLayoutInfer {
        MakeShared(op_->impl()->OutputsTensor()[0]->GetShape().size());
    auto out_infer = CreateOutputsTensor(required_pv);
    for (auto in : op_->impl()->InputsTensor()) {
-      (*fcl).BindInput(context_->GetMapedTensor(in));
+      (*fcl).BindInput(context_->GetMappedTensor(in));
    }
    (*fcl).BindOutput(out_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/gather_layout_inference.h
+++ b/src/tim/transform/ops/gather_layout_inference.h
@ -45,7 +45,7 @@ class GatherLayoutInfer : public OpLayoutInfer {
        op_->impl()->node()->nn_param.gather.batch_dims);
    int32_t output_rank = -1;
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*gather).BindInput(context_->GetMapedTensor(i_src));
+      (*gather).BindInput(context_->GetMappedTensor(i_src));
      output_rank += i_src->GetShape().size();
    }
    auto infer_out = CreateOutputsTensor(
--- a/src/tim/transform/ops/gather_nd_layout_inference.h
+++ b/src/tim/transform/ops/gather_nd_layout_inference.h
@ -46,7 +46,7 @@ class GatherNdLayoutInfer : public OpLayoutInfer {

    auto gather = context_->infer_graph_->CreateOperation<vx::ops::GatherNd>();
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*gather).BindInput(context_->GetMapedTensor(i_src));
+      (*gather).BindInput(context_->GetMappedTensor(i_src));
    }
    auto infer_out = CreateOutputsTensor(
        context_->GetPermuteVector(op_->impl()->InputsTensor()[0]));
--- a/src/tim/transform/ops/grouped_conv2d_layout_inference.h
+++ b/src/tim/transform/ops/grouped_conv2d_layout_inference.h
@ -79,10 +79,10 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
    auto final_pv = input_pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
      context_->SetPermuteVector(input_tensors[0], input_pv);
    }
    context_->UpdateTensorMap(input_tensors[0], infer_input);
@ -104,10 +104,10 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
      auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
      if (!final_pv->IsAligned()) {
        infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
        context_->SetPermuteVector(input_tensors[1], weight_required_pv);
      } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
        context_->SetPermuteVector(input_tensors[1], weight_pv);
      }
      context_->UpdateTensorMap(input_tensors[1], infer_weight);
@ -121,7 +121,7 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
        infer_bias = context_->infer_graph_->CreateTensor(
            input_tensors[2]->GetSpec(), (const void*)dataRef.data());
      } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
      }
      auto bias_pv = MakeShared(1);
      context_->UpdateTensorMap(input_tensors[2], infer_bias);
@ -131,7 +131,7 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
    auto grouped_conv2d = op_->Clone(context_->infer_graph_);
    auto otensor_infer = CreateOutputsTensor(required_pv);
    for (const auto& i_src : input_tensors) {
-      (*grouped_conv2d).BindInput(context_->GetMapedTensor(i_src));
+      (*grouped_conv2d).BindInput(context_->GetMappedTensor(i_src));
    }
    (*grouped_conv2d).BindOutput(otensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/instance_norm_layout_inference.h
+++ b/src/tim/transform/ops/instance_norm_layout_inference.h
@ -63,10 +63,10 @@ class InstanceNormLayoutInfer : public OpLayoutInfer {
    auto final_pv = input_pv->Reverse()->Add(required_pv);
    std::shared_ptr<vx::Tensor> infer_input;
    if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
      context_->SetPermuteVector(input_tensors[0], input_pv);
    }
    context_->UpdateTensorMap(input_tensors[0], infer_input);
@ -85,7 +85,7 @@ class InstanceNormLayoutInfer : public OpLayoutInfer {
    auto instance_norm = op_->Clone(context_->infer_graph_);
    auto outs_infer = CreateOutputsTensor(required_pv);
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*instance_norm).BindInput(context_->GetMapedTensor(i_src));
+      (*instance_norm).BindInput(context_->GetMappedTensor(i_src));
    }
    (*instance_norm).BindOutput(outs_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/l2normalization_layout_inference.h
+++ b/src/tim/transform/ops/l2normalization_layout_inference.h
@ -47,7 +47,7 @@ class L2NormalizationLayoutInfer : public OpLayoutInfer {
    auto l2norm =
        context_->infer_graph_->CreateOperation<vx::ops::L2Normalization>(axis);
    auto infer_out = CreateOutputsTensor(input_pv);
-    (*l2norm).BindInput(context_->GetMapedTensor(src_input));
+    (*l2norm).BindInput(context_->GetMappedTensor(src_input));
    (*l2norm).BindOutput(infer_out[0]);

    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
--- a/src/tim/transform/ops/logical_layout_inference.h
+++ b/src/tim/transform/ops/logical_layout_inference.h
@ -71,7 +71,7 @@ class LogicalOpsLayoutInfer : public OpLayoutInfer {
    auto infer_out = CreateOutputsTensor(required_pv);
    auto logical_op = context_->infer_graph_->CreateOperation<OpTpye>();
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*logical_op).BindInput(context_->GetMapedTensor(i_src));
+      (*logical_op).BindInput(context_->GetMappedTensor(i_src));
    }
    (*logical_op).BindOutput(infer_out[0]);

--- a/src/tim/transform/ops/lrn_layout_inference.h
+++ b/src/tim/transform/ops/lrn_layout_inference.h
@ -53,7 +53,7 @@ class LRNLayoutInfer : public OpLayoutInfer {
                   ->CreateOperation<vx::ops::LocalResponseNormalization>(
                       size, alpha, beta, bias, axis);
    auto infer_out = CreateOutputsTensor(input_pv);
-    (*lrn).BindInput(context_->GetMapedTensor(src_input));
+    (*lrn).BindInput(context_->GetMappedTensor(src_input));
    (*lrn).BindOutput(infer_out[0]);

    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
--- a/src/tim/transform/ops/op_layout_inference.cc
+++ b/src/tim/transform/ops/op_layout_inference.cc
@ -38,16 +38,13 @@ void OpLayoutInfer::OnOutputs(
  auto graph_outputs = context_->src_graph_->OutputsTensor();
  auto op_outputs = op_->impl()->OutputsTensor();
  for (const auto& out : op_outputs) {
-    if (graph_outputs.end() !=
-        std::find(graph_outputs.begin(), graph_outputs.end(), out)) {
-      context_->UpdateGraphOutputMap(out, context_->GetMapedTensor(out));
+    if (graph_outputs.cend() !=
+        std::find(graph_outputs.cbegin(), graph_outputs.cend(), out)) {
      auto pv = context_->GetPermuteVector(out);
      if (!pv->IsAligned()) {
-        auto perm_out = InsertPermute(context_->GetMapedTensor(out),
+        auto perm_out = InsertPermute(context_->GetMappedTensor(out),
                                      pv->Reverse(), true, out);
-        // Update graph out tensor
        context_->UpdateTensorMap(out, perm_out);
-        context_->UpdateGraphOutputMap(out, perm_out);
      }
      if (!context_->src_graph_->GetConsumersOp(out).empty()) {
        // The tensor is output of graph, but it also is the input of other operations
@ -65,19 +62,18 @@ void OpLayoutInfer::OnOutputs(
 std::shared_ptr<vx::Tensor> OpLayoutInfer::InsertPermute(
    std::shared_ptr<vx::Tensor> input, std::shared_ptr<IPermuteVector> perm,
    bool is_graph_output, std::shared_ptr<vx::Tensor> src_out) {
-  auto out_spec = input->GetSpec();
+  std::shared_ptr<vx::Tensor> out_tensor;
  if (is_graph_output) {
-    auto out_shape = src_out->GetShape();
-    out_spec.SetShape(out_shape);
-    out_spec.SetAttribute(vx::TensorAttribute::OUTPUT);
+    out_tensor = context_->GetMappedGraphOutputTensor(src_out);
  } else {
-    out_spec.SetAttribute(vx::TensorAttribute::TRANSIENT);
+    auto out_spec = input->GetSpec().AsTransientSpec();
+    if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) {
+      out_spec.quantization_.SetChannelDim(
+          MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim()));
+    }
+    out_tensor = context_->infer_graph_->CreateTensor(out_spec);
  }
-  if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) {
-    out_spec.quantization_.SetChannelDim(
-        MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim()));
-  }
-  auto out_tensor = context_->infer_graph_->CreateTensor(out_spec);
+
  auto perm_op = context_->infer_graph_->CreateOperation<vx::ops::Transpose>(
      perm->AsStdVec());
  (*perm_op).BindInput(input).BindOutput(out_tensor);
@ -88,20 +84,28 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
    std::shared_ptr<IPermuteVector> required_pv) {
  std::vector<std::shared_ptr<vx::Tensor>> outputs_tensor;

-  if (op_->impl()->OutputsTensor().size() > 1) {
+  auto op_outputs = op_->impl()->OutputsTensor();
+  if (op_outputs.size() > 1) {
    // todo(sven): potential bug here if node have multi-output and require layout inference
    std::cout << "warning at " << __FUNCTION__ << ", #" << __LINE__
              << std::endl;
  }

-  for (const auto& o : op_->impl()->OutputsTensor()) {
+  for (const auto& o : op_outputs) {
    auto in_shape = o->GetShape();
    auto out_spec = o->GetSpec();
-    if (!(required_pv->IsAligned())) {
+    if (!required_pv->IsAligned()) {
      out_spec = out_spec.AsTransientSpec();
    }
-    auto t_infer = context_->infer_graph_->CreateTensor(out_spec);
-    context_->UpdateTensorMap(o, t_infer);
+
+    std::shared_ptr<vx::Tensor> t_infer;
+    if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
+      t_infer = context_->GetMappedTensor(o);
+    } else {
+      t_infer = context_->infer_graph_->CreateTensor(out_spec);
+      context_->UpdateTensorMap(o, t_infer);
+    }
+
    outputs_tensor.push_back(t_infer);
  }
  return outputs_tensor;
@ -111,19 +115,26 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
    const std::vector<std::shared_ptr<IPermuteVector>>& required_pv) {
  std::vector<std::shared_ptr<vx::Tensor>> outputs_tensor;

-  assert(required_pv.size() == (op_->impl()->OutputsTensor().size()));
+  auto op_outputs = op_->impl()->OutputsTensor();
+  assert(required_pv.size() == (op_outputs.size()));

-  uint32_t i = 0;
-  for (const auto& o : op_->impl()->OutputsTensor()) {
+  for (size_t i = 0; i < op_outputs.size(); i++) {
+    const auto& o = op_outputs[i];
    auto in_shape = o->GetShape();
    auto out_spec = o->GetSpec();
-    if (!(required_pv[i]->IsAligned())) {
+    if (!required_pv[i]->IsAligned()) {
      out_spec = out_spec.AsTransientSpec();
    }
-    auto t_infer = context_->infer_graph_->CreateTensor(out_spec);
-    context_->UpdateTensorMap(o, t_infer);
+
+    std::shared_ptr<vx::Tensor> t_infer;
+    if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
+      t_infer = context_->GetMappedTensor(o);
+    } else {
+      t_infer = context_->infer_graph_->CreateTensor(out_spec);
+      context_->UpdateTensorMap(o, t_infer);
+    }
+
    outputs_tensor.push_back(t_infer);
-    i++;
  }
  return outputs_tensor;
 }
@ -198,8 +209,8 @@ OpLayoutInfer::AlignPermuteVectorForMutilInputs() {
      std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
      i_src->CopyDataFromTensor(dataRef.data());
      context_->UpdateTensorMap(
-          i_src, context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                      (const void*)dataRef.data()));
+          i_src, context_->infer_graph_->CreateTensor(
+                     i_src->GetSpec(), (const void*)dataRef.data()));
      context_->SetPermuteVector(i_src, MakeShared(i_src->GetShape().size()));
    }
  } else {
@ -215,9 +226,9 @@ OpLayoutInfer::AlignPermuteVectorForMutilInputs() {
      } else {
        auto final_pv =
            context_->GetPermuteVector(i_src)->Reverse()->Add(required_pv);
-        final_pv->IsAligned() ? perm_out = context_->GetMapedTensor(i_src)
+        final_pv->IsAligned() ? perm_out = context_->GetMappedTensor(i_src)
                              : perm_out = InsertPermute(
-                                    context_->GetMapedTensor(i_src), final_pv);
+                                    context_->GetMappedTensor(i_src), final_pv);
      }
      context_->UpdateTensorMap(i_src, perm_out);
      context_->SetPermuteVector(i_src, required_pv);
@ -247,8 +258,8 @@ OpLayoutInfer::AlignPermuteVectorForElementWise() {
      if (required_pv->IsAligned()) {
        std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
        i_src->CopyDataFromTensor(dataRef.data());
-        perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                        (const void*)dataRef.data());
+        perm_out = context_->infer_graph_->CreateTensor(
+            i_src->GetSpec(), (const void*)dataRef.data());
      } else if (i_src->GetShape().size() == required_pv->Rank()) {
        perm_out = PermuteConstTensor(i_src, required_pv);
        // need shape expansion
@ -263,8 +274,8 @@ OpLayoutInfer::AlignPermuteVectorForElementWise() {
      auto final_pv =
          context_->GetPermuteVector(i_src)->Reverse()->Add(required_pv);
      final_pv->IsAligned()
-          ? perm_out = context_->GetMapedTensor(i_src)
-          : perm_out = InsertPermute(context_->GetMapedTensor(i_src), final_pv);
+          ? perm_out = context_->GetMappedTensor(i_src)
+          : perm_out = InsertPermute(context_->GetMappedTensor(i_src), final_pv);
    }
    context_->UpdateTensorMap(i_src, perm_out);
    context_->SetPermuteVector(i_src, required_pv);
@ -280,11 +291,11 @@ void OpLayoutInfer::ReverseInputsPermuteVector() {
      if (i_src->IsConstTensor()) {
        std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
        i_src->CopyDataFromTensor(dataRef.data());
-        perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                        (const void*)dataRef.data());
+        perm_out = context_->infer_graph_->CreateTensor(
+            i_src->GetSpec(), (const void*)dataRef.data());
        input_pv = MakeShared(i_src->GetShape().size());
      } else {
-        perm_out = context_->GetMapedTensor(i_src);
+        perm_out = context_->GetMappedTensor(i_src);
        input_pv = context_->GetPermuteVector(i_src);
        if (!input_pv->IsAligned()) {
          perm_out = InsertPermute(perm_out, input_pv->Reverse());
--- a/src/tim/transform/ops/pad_layout_inference.h
+++ b/src/tim/transform/ops/pad_layout_inference.h
@ -63,7 +63,7 @@ class PadLayoutInfer : public OpLayoutInfer {
        front_size, back_size, pad_value, pad_mode);

    auto out_infer = CreateOutputsTensor(input_pv);
-    (*pad).BindInput(context_->GetMapedTensor(i_src));
+    (*pad).BindInput(context_->GetMappedTensor(i_src));
    (*pad).BindOutput(out_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
--- a/src/tim/transform/ops/pad_v2_layout_inference.h
+++ b/src/tim/transform/ops/pad_v2_layout_inference.h
@ -61,7 +61,7 @@ class PadV2LayoutInfer : public OpLayoutInfer {
    auto pad_v2 = context_->infer_graph_->CreateOperation<vx::ops::PadV2>(
        front_size, back_size, pad_value);
    auto out_infer = CreateOutputsTensor(input_pv);
-    (*pad_v2).BindInput(context_->GetMapedTensor(i_src));
+    (*pad_v2).BindInput(context_->GetMappedTensor(i_src));
    (*pad_v2).BindOutput(out_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
--- a/src/tim/transform/ops/pool2d_layout_inference.h
+++ b/src/tim/transform/ops/pool2d_layout_inference.h
@ -50,7 +50,7 @@ class Pool2dLayoutInfer : public OpLayoutInfer {
    auto final_pv = pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->UpdateTensorMap(input_tensors[0], perm_out);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    }
@ -82,7 +82,7 @@ class Pool2dLayoutInfer : public OpLayoutInfer {
          pool_type, pad_type, ksize, stride, round_type, vx::DataLayout::WHCN);
    }
    auto otensor_infer = CreateOutputsTensor(required_pv);
-    (*pool2d).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*pool2d).BindInput(context_->GetMappedTensor(input_tensors[0]));
    (*pool2d).BindOutput(otensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
    // Add out tensor of src_graph into next_tensor
--- a/src/tim/transform/ops/reduce_layout_inference.h
+++ b/src/tim/transform/ops/reduce_layout_inference.h
@ -60,7 +60,7 @@ class ReduceLayoutInfer : public OpLayoutInfer {
    }
    auto reduce = context_->infer_graph_->CreateOperation<OpType>(
        new_axis, op_->impl()->node()->nn_param.reduce.keep_dim);
-    (*reduce).BindInput(context_->GetMapedTensor(t_src));
+    (*reduce).BindInput(context_->GetMappedTensor(t_src));

    if (op_->impl()->node()->nn_param.reduce.keep_dim) {
      auto otensor_infer = CreateOutputsTensor(pv);
--- a/src/tim/transform/ops/resize_layout_inference.h
+++ b/src/tim/transform/ops/resize_layout_inference.h
@ -51,7 +51,7 @@ class ResizeLayoutInfer : public OpLayoutInfer {
    auto final_pv = input_pv->Reverse()->Add(required_pv);

    if (!final_pv->IsAligned()) {
-      auto perm_out = InsertPermute(context_->GetMapedTensor(i_src), final_pv);
+      auto perm_out = InsertPermute(context_->GetMappedTensor(i_src), final_pv);
      context_->UpdateTensorMap(i_src, perm_out);
      context_->SetPermuteVector(i_src, final_pv);
    }
@ -70,7 +70,7 @@ class ResizeLayoutInfer : public OpLayoutInfer {
        target_width);

    auto out_infer = CreateOutputsTensor(required_pv);
-    (*resize).BindInput(context_->GetMapedTensor(i_src));
+    (*resize).BindInput(context_->GetMappedTensor(i_src));
    (*resize).BindOutput(out_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
--- a/src/tim/transform/ops/reverse_layout_inference.h
+++ b/src/tim/transform/ops/reverse_layout_inference.h
@ -49,7 +49,7 @@ class ReverseLayoutInfer : public OpLayoutInfer {

    auto reverse = context_->infer_graph_->CreateOperation<vx::ops::Reverse>(
        axis);
-    (*reverse).BindInput(context_->GetMapedTensor(src_input));
+    (*reverse).BindInput(context_->GetMappedTensor(src_input));
    auto infer_out = CreateOutputsTensor(input_pv);
    (*reverse).BindOutput(infer_out[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
--- a/src/tim/transform/ops/roi_align_layout_inference.h
+++ b/src/tim/transform/ops/roi_align_layout_inference.h
@ -62,10 +62,10 @@ class RoiAlignLayoutInfer : public OpLayoutInfer {
    auto final_pv = input_pv->Reverse()->Add(required_pv);
    std::shared_ptr<vx::Tensor> infer_input;
    if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
      context_->SetPermuteVector(input_tensors[0], input_pv);
    }
    context_->UpdateTensorMap(input_tensors[0], infer_input);
@ -84,7 +84,7 @@ class RoiAlignLayoutInfer : public OpLayoutInfer {
    auto roi_align = op_->Clone(context_->infer_graph_);
    auto outs_infer = CreateOutputsTensor(required_pv);
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*roi_align).BindInput(context_->GetMapedTensor(i_src));
+      (*roi_align).BindInput(context_->GetMappedTensor(i_src));
    }
    (*roi_align).BindOutput(outs_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/roi_pool_layout_inference.h
+++ b/src/tim/transform/ops/roi_pool_layout_inference.h
@ -62,10 +62,10 @@ class RoiPoolLayoutInfer : public OpLayoutInfer {
    auto final_pv = input_pv->Reverse()->Add(required_pv);
    std::shared_ptr<vx::Tensor> infer_input;
    if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
      context_->SetPermuteVector(input_tensors[0], input_pv);
    }
    context_->UpdateTensorMap(input_tensors[0], infer_input);
@ -84,7 +84,7 @@ class RoiPoolLayoutInfer : public OpLayoutInfer {
    auto roi_pool = op_->Clone(context_->infer_graph_);
    auto outs_infer = CreateOutputsTensor(required_pv);
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*roi_pool).BindInput(context_->GetMapedTensor(i_src));
+      (*roi_pool).BindInput(context_->GetMappedTensor(i_src));
    }
    (*roi_pool).BindOutput(outs_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
--- a/src/tim/transform/ops/select_layout_inference.h
+++ b/src/tim/transform/ops/select_layout_inference.h
@ -42,7 +42,7 @@ class SelectLayoutInfer : public OpLayoutInfer {
    auto select = context_->infer_graph_->CreateOperation<vx::ops::Select>();
    auto infer_out = CreateOutputsTensor(required_pv);
    for (const auto& i_src : op_->impl()->InputsTensor()) {
-        (*select).BindInput(context_->GetMapedTensor(i_src));
+        (*select).BindInput(context_->GetMappedTensor(i_src));
    }
    (*select).BindOutput(infer_out[0]);

--- a/src/tim/transform/ops/simple_ops_layout_inference.h
+++ b/src/tim/transform/ops/simple_ops_layout_inference.h
@ -49,7 +49,7 @@ class SimpleOpsLayoutInfer : public OpLayoutInfer {
    auto out_infer = CreateOutputsTensor(input_pv);
    auto simple_op = context_->infer_graph_->CreateOperation<OpType>();
    (*simple_op)
-        .BindInput(context_->GetMapedTensor(i_src))
+        .BindInput(context_->GetMappedTensor(i_src))
        .BindOutput(out_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
--- a/src/tim/transform/ops/slice_layout_inference.h
+++ b/src/tim/transform/ops/slice_layout_inference.h
@ -54,7 +54,7 @@ class SliceLayoutInfer : public OpLayoutInfer {
    auto slice = context_->infer_graph_->CreateOperation<vx::ops::Slice>(
        dims, start, length);
    auto infer_out = CreateOutputsTensor(input_pv);
-    (*slice).BindInput(context_->GetMapedTensor(src_input));
+    (*slice).BindInput(context_->GetMappedTensor(src_input));
    (*slice).BindOutput(infer_out[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
--- a/src/tim/transform/ops/softmax_layout_inference.h
+++ b/src/tim/transform/ops/softmax_layout_inference.h
@ -49,7 +49,7 @@ class SoftmaxLayoutInfer : public OpLayoutInfer {
    auto softmax =
        context_->infer_graph_->CreateOperation<vx::ops::Softmax>(beta, axis);
    auto otensor_infer = CreateOutputsTensor(required_pv);
-    (*softmax).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*softmax).BindInput(context_->GetMappedTensor(input_tensors[0]));
    (*softmax).BindOutput(otensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
    // Add out tensor of src_graph into next_tensor
--- a/src/tim/transform/ops/space2batch_layout_inference.h
+++ b/src/tim/transform/ops/space2batch_layout_inference.h
@ -51,7 +51,7 @@ class Space2BatchLayoutInfer : public OpLayoutInfer {
    auto final_pv = pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->UpdateTensorMap(input_tensors[0], perm_out);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    }
@ -70,7 +70,7 @@ class Space2BatchLayoutInfer : public OpLayoutInfer {
        context_->infer_graph_->CreateOperation<vx::ops::Space2Batch>(
            block_size, pad, vx::DataLayout::WHCN);
    auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2batch).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2batch).BindInput(context_->GetMappedTensor(input_tensors[0]));
    (*space2batch).BindOutput(out_tensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
    // Add out tensor of src_graph into next_tensor
--- a/src/tim/transform/ops/space2depth_layout_inference.h
+++ b/src/tim/transform/ops/space2depth_layout_inference.h
@ -51,7 +51,7 @@ class SpaceToDepthLayoutInfer : public OpLayoutInfer {
    auto final_pv = pv->Reverse()->Add(required_pv);
    if (!final_pv->IsAligned()) {
      auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
      context_->UpdateTensorMap(input_tensors[0], perm_out);
      context_->SetPermuteVector(input_tensors[0], required_pv);
    }
@ -64,7 +64,7 @@ class SpaceToDepthLayoutInfer : public OpLayoutInfer {
        context_->infer_graph_->CreateOperation<vx::ops::SpaceToDepth>(
            block_size, vx::DataLayout::WHCN);
    auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2depth).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2depth).BindInput(context_->GetMappedTensor(input_tensors[0]));
    (*space2depth).BindOutput(out_tensor_infer[0]);
    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
    // Add out tensor of src_graph into next_tensor
--- a/src/tim/transform/ops/split_layout_inference.h
+++ b/src/tim/transform/ops/split_layout_inference.h
@ -51,7 +51,7 @@ class SplitLayoutInfer : public OpLayoutInfer {
    auto split =
        context_->infer_graph_->CreateOperation<vx::ops::Split>(axis, slices);
    auto infer_out = CreateOutputsTensor(input_pv);
-    (*split).BindInput(context_->GetMapedTensor(input_tensor));
+    (*split).BindInput(context_->GetMappedTensor(input_tensor));
    (*split).BindOutputs(infer_out);
    for (const auto& out : op_->impl()->OutputsTensor()) {
        context_->SetPermuteVector(out, input_pv);
--- a/src/tim/transform/ops/squeeze_layout_inference.h
+++ b/src/tim/transform/ops/squeeze_layout_inference.h
@ -50,7 +50,7 @@ class SqueezeLayoutInfer : public OpLayoutInfer {
    auto squeeze =
        context_->infer_graph_->CreateOperation<vx::ops::Squeeze>(axis);
    (*squeeze).BindInput(
-        context_->GetMapedTensor(op_->impl()->InputsTensor()[0]));
+        context_->GetMappedTensor(op_->impl()->InputsTensor()[0]));

    auto required_pv =
        MakeShared(op_->impl()->OutputsTensor()[0]->GetShape().size());
--- a/src/tim/transform/ops/stack_layout_inference.h
+++ b/src/tim/transform/ops/stack_layout_inference.h
@ -53,7 +53,7 @@ class StackLayoutInfer : public OpLayoutInfer {
    auto aligninput_pv = AlignPermuteVectorForMutilInputs();

    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*stack).BindInput(context_->GetMapedTensor(i_src));
+      (*stack).BindInput(context_->GetMappedTensor(i_src));
    }

    if (axis < 0) {
--- a/src/tim/transform/ops/stridedslice_layout_inference.h
+++ b/src/tim/transform/ops/stridedslice_layout_inference.h
@ -105,7 +105,7 @@ class StridedSliceLayoutInfer : public OpLayoutInfer {
      }

      auto infer_out = CreateOutputsTensor(out_pv);
-      (*strided_slice).BindInput(context_->GetMapedTensor(src_input));
+      (*strided_slice).BindInput(context_->GetMappedTensor(src_input));
      (*strided_slice).BindOutput(infer_out[0]);
      context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], out_pv);
      next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
--- a/src/tim/transform/ops/transpose_layout_inference.h
+++ b/src/tim/transform/ops/transpose_layout_inference.h
@ -42,7 +42,7 @@ class TransposeLayoutInfer : public OpLayoutInfer {
  void OnInputs(
      std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
    auto src_input = op_->impl()->InputsTensor()[0];
-    auto infer_input = context_->GetMapedTensor(src_input);
+    auto infer_input = context_->GetMappedTensor(src_input);
    auto input_pv = context_->GetPermuteVector(src_input);

    std::vector<uint32_t> perm(op_->impl()->node()->nn_param.permute.dim_num);
--- a/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
+++ b/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
@ -72,7 +72,7 @@ class UnidirectionalLstmLayoutInfer : public OpLayoutInfer {


    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
    }

    std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
--- a/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
+++ b/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
@ -72,7 +72,7 @@ class UnidirectionalRnnLayoutInfer : public OpLayoutInfer {


    for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
    }

    
--- a/src/tim/transform/ops/yolov4_layout_inference.h
+++ b/src/tim/transform/ops/yolov4_layout_inference.h
@ -53,7 +53,7 @@ class Yolov4LayoutInfer : public OpLayoutInfer {
        context_->SetPermuteVector(i_src, MakeShared(4));
        context_->UpdateTensorMap(i_src, i_infer);
      } 
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
    }

    std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;