From feaf06365b29f70d11003b5b5b1bf3bd4b67edf4 Mon Sep 17 00:00:00 2001
From: Yunshan <goose_bomb@outlook.com>
Date: Wed, 20 Dec 2023 21:26:16 +0800
Subject: [PATCH] Refine layout inference (#671)

* Remove unnecessary compiler flags

* Refactor CMakeLists.txt

* Tweak CMakeLists.txt for libtim_internal

* Tweak CMakeLists.txt for libtim-vx

* Make TIM_VX_ENABLE_TEST defaults to OFF

* Eliminate usage of include_directories

* Fix CI unit test

* Fix warnings relating to inheritance

* Keep graph output order in layout inference

Type: Code Improvement

* Fix typos in layout inference

Type: Code Improvement

---------

authored-by: Xiaoran Weng <Xiaoran.Weng@verisilicon.com>
---
 src/tim/transform/layout_infer_context.h      |  16 +-
 src/tim/transform/layout_inference.cc         | 310 ++++++++++--------
 .../ops/activation_layout_inference.h         |   8 +-
 src/tim/transform/ops/addn_layout_inference.h |   2 +-
 src/tim/transform/ops/arg_layout_inference.h  |   2 +-
 .../ops/batch2space_layout_inference.h        |   4 +-
 .../ops/batchnorm_layout_inference.h          |  12 +-
 .../ops/bidirectional_rnn_layout_inference.h  |   2 +-
 .../ops/broadcast_layout_inference.h          |   2 +-
 .../transform/ops/concat_layout_inferene.h    |   2 +-
 .../transform/ops/conv2d_layout_inference.h   |  12 +-
 .../transform/ops/conv3d_layout_inference.h   |   8 +-
 .../transform/ops/deconv2d_layout_inference.h |  12 +-
 .../transform/ops/default_layout_inference.h  |   2 +-
 .../ops/depth2space_layout_inference.h        |   4 +-
 .../ops/elementwise_layout_inference.h        |   4 +-
 .../ops/fullyconnected_layout_inference.h     |   2 +-
 .../transform/ops/gather_layout_inference.h   |   2 +-
 .../ops/gather_nd_layout_inference.h          |   2 +-
 .../ops/grouped_conv2d_layout_inference.h     |  12 +-
 .../ops/instance_norm_layout_inference.h      |   6 +-
 .../ops/l2normalization_layout_inference.h    |   2 +-
 .../transform/ops/logical_layout_inference.h  |   2 +-
 src/tim/transform/ops/lrn_layout_inference.h  |   2 +-
 src/tim/transform/ops/op_layout_inference.cc  |  89 ++---
 src/tim/transform/ops/pad_layout_inference.h  |   2 +-
 .../transform/ops/pad_v2_layout_inference.h   |   2 +-
 .../transform/ops/pool2d_layout_inference.h   |   4 +-
 .../transform/ops/reduce_layout_inference.h   |   2 +-
 .../transform/ops/resize_layout_inference.h   |   4 +-
 .../transform/ops/reverse_layout_inference.h  |   2 +-
 .../ops/roi_align_layout_inference.h          |   6 +-
 .../transform/ops/roi_pool_layout_inference.h |   6 +-
 .../transform/ops/select_layout_inference.h   |   2 +-
 .../ops/simple_ops_layout_inference.h         |   2 +-
 .../transform/ops/slice_layout_inference.h    |   2 +-
 .../transform/ops/softmax_layout_inference.h  |   2 +-
 .../ops/space2batch_layout_inference.h        |   4 +-
 .../ops/space2depth_layout_inference.h        |   4 +-
 .../transform/ops/split_layout_inference.h    |   2 +-
 .../transform/ops/squeeze_layout_inference.h  |   2 +-
 .../transform/ops/stack_layout_inference.h    |   2 +-
 .../ops/stridedslice_layout_inference.h       |   2 +-
 .../ops/transpose_layout_inference.h          |   2 +-
 .../unidirectional_lstm_layout_inference.h    |   2 +-
 .../ops/unidirectional_rnn_layout_inference.h |   2 +-
 .../transform/ops/yolov4_layout_inference.h   |   2 +-
 47 files changed, 318 insertions(+), 263 deletions(-)

diff --git a/src/tim/transform/layout_infer_context.h b/src/tim/transform/layout_infer_context.h
index d63960c..41dd2eb 100644
--- a/src/tim/transform/layout_infer_context.h
+++ b/src/tim/transform/layout_infer_context.h
@@ -1,16 +1,18 @@
 #ifndef TIM_VX_LAYOUT_INFER_CONTEXT_H_
 #define TIM_VX_LAYOUT_INFER_CONTEXT_H_
+
 #include "permute_vector.h"
 #include "tim/transform/layout_inference.h"
 
+#include <unordered_map>
+
 namespace tim {
 namespace transform {
 namespace layout_inference_impl {
 class LayoutInferContext {
  public:
   LayoutInferContext(const std::shared_ptr<vx::Graph>& src_graph,
-                     std::shared_ptr<vx::Graph>& infer_graph)
-      : src_graph_(src_graph), infer_graph_(infer_graph) {}
+                     std::shared_ptr<vx::Graph>& infer_graph);
   void SetPermuteVector(std::shared_ptr<vx::Tensor> tensor,
                         std::shared_ptr<IPermuteVector> pv);
   const std::shared_ptr<IPermuteVector> GetPermuteVector(
@@ -20,14 +22,18 @@ class LayoutInferContext {
   bool IsReadyForInfer(const std::shared_ptr<vx::Operation>& op) const;
   void UpdateTensorMap(const std::shared_ptr<vx::Tensor>& t_src,
                        const std::shared_ptr<vx::Tensor>& t_layout);
-  std::shared_ptr<vx::Tensor> GetMapedTensor(
+  std::shared_ptr<vx::Tensor> GetMappedTensor(
+      const std::shared_ptr<vx::Tensor>& t_src) const;
+  std::shared_ptr<vx::Tensor> GetMappedGraphInputTensor(
+      const std::shared_ptr<vx::Tensor>& t_src) const;
+  std::shared_ptr<vx::Tensor> GetMappedGraphOutputTensor(
       const std::shared_ptr<vx::Tensor>& t_src) const;
 
   void UpdateGraphInputMap(const std::shared_ptr<vx::Tensor>& i_src,
                            const std::shared_ptr<vx::Tensor>& i_layout);
 
   void UpdateGraphOutputMap(const std::shared_ptr<vx::Tensor>& o_src,
-                           const std::shared_ptr<vx::Tensor>& o_layout);
+                            const std::shared_ptr<vx::Tensor>& o_layout);
 
   std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<vx::Tensor>>
   GetGraphInputMap() const {
@@ -44,7 +50,7 @@ class LayoutInferContext {
  private:
   std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<IPermuteVector>>
       tensor_pv_;
-  std::vector<std::shared_ptr<vx::Operation>> visited_op_;
+  std::unordered_map<std::shared_ptr<vx::Operation>, bool> op_visited_;
   // tensor_in_src -> tensor_in_layout
   std::map<std::shared_ptr<vx::Tensor>, std::shared_ptr<vx::Tensor>>
       tensor_map_;
diff --git a/src/tim/transform/layout_inference.cc b/src/tim/transform/layout_inference.cc
index fd2f163..5bb11d5 100644
--- a/src/tim/transform/layout_inference.cc
+++ b/src/tim/transform/layout_inference.cc
@@ -73,7 +73,7 @@
 #include "ops/roi_pool_layout_inference.h"
 
 #include <algorithm>
-#include <deque>
+#include <queue>
 
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
@@ -87,7 +87,16 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
     std::shared_ptr<layout_inference_impl::LayoutInferContext>& ctx,
     const std::shared_ptr<vx::Operation>& op);
 
-// Implemention for LayoutInferContext
+// Implementation for LayoutInferContext
+LayoutInferContext::LayoutInferContext(
+    const std::shared_ptr<vx::Graph>& src_graph,
+    std::shared_ptr<vx::Graph>& infer_graph)
+    : src_graph_(src_graph), infer_graph_(infer_graph) {
+  for (const auto& op : src_graph->OpVector()) {
+    op_visited_[op] = false;
+  }
+}
+
 void LayoutInferContext::SetPermuteVector(std::shared_ptr<vx::Tensor> tensor,
                                           std::shared_ptr<IPermuteVector> pv) {
   if (tensor_pv_.end() != tensor_pv_.find(tensor)) {
@@ -110,27 +119,19 @@ const std::shared_ptr<IPermuteVector> LayoutInferContext::GetPermuteVector(
 }
 
 void LayoutInferContext::MarkVisited(const std::shared_ptr<vx::Operation>& op) {
-  if (visited_op_.end() !=
-      std::find(visited_op_.begin(), visited_op_.end(), op)) {
-    VSILOGW("The operation has been mark as visited.");
-  } else {
-    visited_op_.push_back(op);
-  }
+  op_visited_[op] = true;
 }
 
-bool LayoutInferContext::IsVisited(const std::shared_ptr<vx::Operation>& op) const {
-  if (visited_op_.end() !=
-      std::find(visited_op_.begin(), visited_op_.end(), op)) {
-    return true;
-  } else {
-    return false;
-  }
+bool LayoutInferContext::IsVisited(
+    const std::shared_ptr<vx::Operation>& op) const {
+  return op_visited_.at(op);
 }
 
 bool LayoutInferContext::IsReadyForInfer(
     const std::shared_ptr<vx::Operation>& op) const {
   for (const auto& tensor : op->impl()->InputsTensor()) {
-    if (!tensor->IsConstTensor() && tensor->GetId() != (uint32_t)-1 &&
+    if (!tensor->IsConstTensor() &&
+        tensor->GetId() != static_cast<uint32_t>(-1) &&
         (tensor_pv_.end() == tensor_pv_.find(tensor))) {
       return false;
     }
@@ -144,68 +145,91 @@ void LayoutInferContext::UpdateTensorMap(
   tensor_map_[t_src] = t_layout;
 }
 
-std::shared_ptr<vx::Tensor> LayoutInferContext::GetMapedTensor(
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedTensor(
     const std::shared_ptr<vx::Tensor>& t_src) const {
   auto it = tensor_map_.find(t_src);
   if (it != tensor_map_.end()) {
     return it->second;
-  } else {
-    VSILOGE("Tensor has not beed inserted in tensor map.");
-    assert(false);
   }
 
+  VSILOGE("Tensor has not beed inserted in tensor map.");
   return nullptr;
 }
 
-void LayoutInferContext::UpdateGraphInputMap(const std::shared_ptr<vx::Tensor>& i_src,
-                           const std::shared_ptr<vx::Tensor>& i_layout) {
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedGraphInputTensor(
+    const std::shared_ptr<vx::Tensor>& t_src) const {
+  auto it = graph_input_map_.find(t_src);
+  if (it != tensor_map_.end()) {
+    return it->second;
+  }
+
+  VSILOGE("Tensor has not beed inserted in graph input tensor map.");
+  return nullptr;
+}
+
+std::shared_ptr<vx::Tensor> LayoutInferContext::GetMappedGraphOutputTensor(
+    const std::shared_ptr<vx::Tensor>& t_src) const {
+  auto it = graph_output_map_.find(t_src);
+  if (it != tensor_map_.end()) {
+    return it->second;
+  }
+
+  VSILOGE("Tensor has not beed inserted in graph output tensor map.");
+  return nullptr;
+}
+
+void LayoutInferContext::UpdateGraphInputMap(
+    const std::shared_ptr<vx::Tensor>& i_src,
+    const std::shared_ptr<vx::Tensor>& i_layout) {
   graph_input_map_[i_src] = i_layout;
 }
 
-void LayoutInferContext::UpdateGraphOutputMap(const std::shared_ptr<vx::Tensor>& o_src,
-                           const std::shared_ptr<vx::Tensor>& o_layout) {
+void LayoutInferContext::UpdateGraphOutputMap(
+    const std::shared_ptr<vx::Tensor>& o_src,
+    const std::shared_ptr<vx::Tensor>& o_layout) {
   graph_output_map_[o_src] = o_layout;
 }
 
-#define REGIST_LAYOUT_INFERENCE(op_idx, name)                     \
+#define REGISTER_LAYOUT_INFERENCE(op_idx, name)                   \
   case op_idx: {                                                  \
     auto op_infer = std::make_shared<name##LayoutInfer>(op, ctx); \
     op_infer->OnInputs(next_tensors);                             \
     op_infer->OnOutputs(next_tensors);                            \
     break;                                                        \
-  }                                                               \
+  }
 
-#define REGIST_REDUCE_LAYOUT_INFERENCE(op_idx)                                 \
-  case op_idx: {                                                               \
-    auto reduce_type = op->impl()->node()->nn_param.reduce.type;               \
-    switch (reduce_type) {                                                     \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);                 \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);                 \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);                   \
-    default:                                                                   \
-      VSILOGW("Op %d: Default layout inference pass for reduce.", reduce_type);\
-      assert(false);                                                           \
-    }                                                                          \
-    break;                                                                     \
-  }                                                                            \
+#define REGISTER_REDUCE_LAYOUT_INFERENCE(op_idx)                    \
+  case op_idx: {                                                    \
+    auto reduce_type = op->impl()->node()->nn_param.reduce.type;    \
+    switch (reduce_type) {                                          \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MEAN, ReduceMean);    \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MAX, ReduceMax);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_MIN, ReduceMin);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_PROD, ReduceProd);    \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_ANY, ReduceAny);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_SUM, ReduceSum);      \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_REDUCE_ALL, ReduceAll);      \
+      default:                                                      \
+        VSILOGW("Op %d: Default layout inference pass for reduce.", \
+                reduce_type);                                       \
+        assert(false);                                              \
+    }                                                               \
+    break;                                                          \
+  }
 
-#define REGIST_LOGICAL_LAYOUT_INFERENCE(op_idx)                                  \
-  case op_idx: {                                                                 \
-    auto logical_type = op->impl()->node()->nn_param.relational_ops.op;          \
-    switch (logical_type)                                                        \
-    {                                                                            \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);                   \
-      REGIST_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);                     \
-    default:                                                                     \
-      VSILOGW("Op %d: Default layout inference pass for logical.", logical_type);\
-      assert(false);                                                             \
-    }                                                                            \
-    break;                                                                       \
-  }                                                                              \
+#define REGISTER_LOGICAL_LAYOUT_INFERENCE(op_idx)                       \
+  case op_idx: {                                                        \
+    auto logical_type = op->impl()->node()->nn_param.relational_ops.op; \
+    switch (logical_type) {                                             \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_LOGICAL_AND, LogicalAnd);        \
+      REGISTER_LAYOUT_INFERENCE(VSI_NN_LOGICAL_OR, LogicalOr);          \
+      default:                                                          \
+        VSILOGW("Op %d: Default layout inference pass for logical.",    \
+                logical_type);                                          \
+        assert(false);                                                  \
+    }                                                                   \
+    break;                                                              \
+  }
 
 std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
     std::shared_ptr<layout_inference_impl::LayoutInferContext>& ctx,
@@ -214,78 +238,80 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
   auto op_id = op->impl()->kind_;
   std::vector<std::shared_ptr<vx::Tensor>> next_tensors;
   switch (op_id) {
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV2D, Conv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GROUPED_CONV2D, GroupedConv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU, Relu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU1, Relu1);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RELU6, Relu6);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ELU, Elu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SIGMOID, Sigmoid);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MISH, Mish);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_HARD_SIGMOID, HardSigmoid);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SOFTRELU, SoftRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SWISH, HardSwish);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_TANH, Tanh);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LEAKY_RELU, LeakyRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONCAT, Concat);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADD, Add);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SUBTRACT, Sub);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MULTIPLY, Multiply);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DIVIDE, Div);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_POW, Pow);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MINIMUM, Minimum);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_MAXIMUM, Maximum);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DATACONVERT, DataConvert);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_NEG, Neg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ABS, Abs);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SIN, Sin);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXP, Exp);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LOG, Log);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQRT, Sqrt);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RSQRT, Rsqrt);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQUARE, Square);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_NOT, LogicalNot);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_POOL, Pool2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SOFTMAX, Softmax);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SQUEEZE, Squeeze);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_STACK, Stack);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2DEPTH, SpaceToDepth);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DEPTH2SPACE, DepthToSpace);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2BATCH, Space2Batch);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BATCH2SPACE, Batch2Space);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PAD, Pad);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PAD2, PadV2);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_FCL2, FullyConnected);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_RESIZE, Resize);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SPLIT, Split);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_STRIDED_SLICE, StridedSlice);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER_ND, GatherNd);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_REVERSE, Reverse);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SLICE, Slice);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_SELECT, Select);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ARGMAX, Arg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ARGMIN, Arg);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_DECONVOLUTION, DeConv2d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BATCH_NORM, BatchNorm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PERMUTE, Transpose);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN, UnidirectionalRnn);
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN, BidirectionalRnn);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONV2D, Conv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GROUPED_CONV2D, GroupedConv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU, Relu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU1, Relu1);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RELU6, Relu6);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ELU, Elu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SIGMOID, Sigmoid);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MISH, Mish);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_HARD_SIGMOID, HardSigmoid);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SOFTRELU, SoftRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SWISH, HardSwish);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_TANH, Tanh);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LEAKY_RELU, LeakyRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONCAT, Concat);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ADD, Add);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SUBTRACT, Sub);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MULTIPLY, Multiply);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DIVIDE, Div);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_POW, Pow);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MINIMUM, Minimum);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_MAXIMUM, Maximum);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DATACONVERT, DataConvert);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_NEG, Neg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ABS, Abs);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SIN, Sin);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_EXP, Exp);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LOG, Log);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQRT, Sqrt);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RSQRT, Rsqrt);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQUARE, Square);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_NOT, LogicalNot);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_POOL, Pool2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SOFTMAX, Softmax);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SQUEEZE, Squeeze);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_STACK, Stack);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2DEPTH, SpaceToDepth);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DEPTH2SPACE, DepthToSpace);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPACE2BATCH, Space2Batch);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BATCH2SPACE, Batch2Space);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PAD, Pad);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PAD2, PadV2);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_FCL2, FullyConnected);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_RESIZE, Resize);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SPLIT, Split);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_STRIDED_SLICE, StridedSlice);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_GATHER_ND, GatherNd);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_REVERSE, Reverse);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SLICE, Slice);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_SELECT, Select);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ARGMAX, Arg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_ARGMIN, Arg);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_DECONVOLUTION, DeConv2d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BATCH_NORM, BatchNorm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_PERMUTE, Transpose);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CONV3D, Conv3d);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_LSTM_OVXLIB, UnidirectionalLstm);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_EXPAND_BROADCAST, Broadcast);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_UNIDIRECTIONAL_SEQUENCE_RNN,
+                              UnidirectionalRnn);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_BIDIRECTIONAL_SEQUENCE_RNN,
+                              BidirectionalRnn);
 #ifdef VSI_FEAT_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS
-    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4);
+    REGISTER_LAYOUT_INFERENCE(VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS, Yolov4);
 #endif
-    REGIST_LOGICAL_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_OPS);
-    REGIST_REDUCE_LAYOUT_INFERENCE(VSI_NN_OP_REDUCE);
+    REGISTER_LOGICAL_LAYOUT_INFERENCE(VSI_NN_OP_LOGICAL_OPS);
+    REGISTER_REDUCE_LAYOUT_INFERENCE(VSI_NN_OP_REDUCE);
     // use default layout inference
     default: {
       VSILOGW("Op %d: default layout inference pass.", op_id);
@@ -312,13 +338,13 @@ LayoutInference(
       std::make_shared<layout_inference_impl::LayoutInferContext>(src_graph,
                                                                   infer_graph);
 
-  std::deque<std::shared_ptr<vx::Tensor>> tensor_queue;
+  std::queue<std::shared_ptr<vx::Tensor>> tensor_queue;
   auto graph_inputs = src_graph->InputsTensor();
   for (const auto& t_src : graph_inputs) {
     auto input = infer_graph->CreateTensor(t_src->GetSpec());
     layout_infer_ctx->UpdateTensorMap(t_src, input);
     layout_infer_ctx->UpdateGraphInputMap(t_src, input);
-    tensor_queue.push_back(t_src);
+    tensor_queue.push(t_src);
     layout_infer_ctx->SetPermuteVector(
         t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end()
                    ? tensor_pv_map[t_src]
@@ -329,27 +355,39 @@ LayoutInference(
   for (auto const_in : const_inputs) {
     std::vector<uint8_t> dataRef(const_in->GetSpec().GetByteSize());
     const_in->CopyDataFromTensor(dataRef.data());
-    auto input =
-        infer_graph->CreateTensor(const_in->GetSpec(), (const void*)dataRef.data());
+    auto input = infer_graph->CreateTensor(const_in->GetSpec(),
+                                           (const void*)dataRef.data());
     layout_infer_ctx->UpdateTensorMap(const_in, input);
-    tensor_queue.push_back(const_in);
+    tensor_queue.push(const_in);
     layout_infer_ctx->SetPermuteVector(
         const_in, tensor_pv_map.find(const_in) != tensor_pv_map.end()
-                   ? tensor_pv_map[const_in]
-                   : MakeShared(const_in->GetShape().size()));
+                      ? tensor_pv_map[const_in]
+                      : MakeShared(const_in->GetShape().size()));
+  }
+
+  auto graph_outputs = src_graph->OutputsTensor();
+  for (const auto& t_src : graph_outputs) {
+    auto output = infer_graph->CreateTensor(t_src->GetSpec());
+    layout_infer_ctx->UpdateTensorMap(t_src, output);
+    layout_infer_ctx->UpdateGraphOutputMap(t_src, output);
+    tensor_queue.push(t_src);
+    layout_infer_ctx->SetPermuteVector(
+        t_src, tensor_pv_map.find(t_src) != tensor_pv_map.end()
+                   ? tensor_pv_map[t_src]
+                   : MakeShared(t_src->GetShape().size()));
   }
 
   while (!tensor_queue.empty()) {
     auto tensor = tensor_queue.front();
-    tensor_queue.pop_front();
+    tensor_queue.pop();
     const auto& consumers = src_graph->GetConsumersOp(tensor);
     for (const auto& op : consumers) {
-      if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ !=-1 &&
+      if (!layout_infer_ctx->IsVisited(op) && op->impl()->kind_ != -1 &&
           layout_infer_ctx->IsReadyForInfer(op)) {
         auto next_tensors =
             layout_inference_impl::HandleLayoutInfer(layout_infer_ctx, op);
         for (const auto& t : next_tensors) {
-          tensor_queue.push_back(t);
+          tensor_queue.push(t);
         }
       }
     }
diff --git a/src/tim/transform/ops/activation_layout_inference.h b/src/tim/transform/ops/activation_layout_inference.h
index 11659bd..841822b 100644
--- a/src/tim/transform/ops/activation_layout_inference.h
+++ b/src/tim/transform/ops/activation_layout_inference.h
@@ -51,7 +51,7 @@ class ActivationLayoutInfer : public OpLayoutInfer {
     auto activation = op_->Clone(context_->infer_graph_);
     auto out_infer = CreateOutputsTensor(input_pv);
     (*activation)
-        .BindInput(context_->GetMapedTensor(i_src))
+        .BindInput(context_->GetMappedTensor(i_src))
         .BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
@@ -107,7 +107,7 @@ class PReluLayoutInfer : public OpLayoutInfer {
             context_->infer_graph_->CreateOperation<vx::ops::Reshape>(
                 boardcast_shape);
         (*reshape)
-            .BindInput(context_->GetMapedTensor(src_slope))
+            .BindInput(context_->GetMappedTensor(src_slope))
             .BindOutput(reshape_out);
         context_->UpdateTensorMap(src_slope, reshape_out);
       }
@@ -130,8 +130,8 @@ class PReluLayoutInfer : public OpLayoutInfer {
     auto out_infer = CreateOutputsTensor(input_pv);
 
     (*prelu)
-        .BindInput(context_->GetMapedTensor(src_input))
-        .BindInput(context_->GetMapedTensor(src_slope));
+        .BindInput(context_->GetMappedTensor(src_input))
+        .BindInput(context_->GetMappedTensor(src_slope));
     (*prelu).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/addn_layout_inference.h b/src/tim/transform/ops/addn_layout_inference.h
index 45650ef..bdeab29 100644
--- a/src/tim/transform/ops/addn_layout_inference.h
+++ b/src/tim/transform/ops/addn_layout_inference.h
@@ -44,7 +44,7 @@ class AddNLayoutInfer : public OpLayoutInfer {
     auto addn = op_->Clone(context_->infer_graph_);
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*addn).BindInput(context_->GetMapedTensor(i_src));
+      (*addn).BindInput(context_->GetMappedTensor(i_src));
     }
     auto infer_out = CreateOutputsTensor(required_pv);
     (*addn).BindOutput(infer_out[0]);
diff --git a/src/tim/transform/ops/arg_layout_inference.h b/src/tim/transform/ops/arg_layout_inference.h
index af46967..b05c70b 100644
--- a/src/tim/transform/ops/arg_layout_inference.h
+++ b/src/tim/transform/ops/arg_layout_inference.h
@@ -45,7 +45,7 @@ class ArgLayoutInfer : public OpLayoutInfer {
 
     auto arg = op_->Clone(context_->infer_graph_);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*arg).BindInput(context_->GetMapedTensor(src_input));
+    (*arg).BindInput(context_->GetMappedTensor(src_input));
     (*arg).BindOutput(infer_out[0]);
 
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/batch2space_layout_inference.h b/src/tim/transform/ops/batch2space_layout_inference.h
index 065ae80..6f6b4b6 100644
--- a/src/tim/transform/ops/batch2space_layout_inference.h
+++ b/src/tim/transform/ops/batch2space_layout_inference.h
@@ -51,7 +51,7 @@ class Batch2SpaceLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -70,7 +70,7 @@ class Batch2SpaceLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::Batch2Space>(
             block_size, crop, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*batch2space).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*batch2space).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*batch2space).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/batchnorm_layout_inference.h b/src/tim/transform/ops/batchnorm_layout_inference.h
index e01d416..7e9ac8f 100644
--- a/src/tim/transform/ops/batchnorm_layout_inference.h
+++ b/src/tim/transform/ops/batchnorm_layout_inference.h
@@ -57,7 +57,7 @@ class BatchNormLayoutInfer : public OpLayoutInfer {
             perm_out = context_->infer_graph_->CreateTensor(src_in->GetSpec(), (const void*)dataRef.data());
             input_pv = MakeShared(src_in->GetShape().size());
         } else {
-          perm_out = context_->GetMapedTensor(src_in);
+          perm_out = context_->GetMappedTensor(src_in);
           input_pv = context_->GetPermuteVector(src_in);
           context_->SetPermuteVector(src_in, input_pv);
           if (idx == 0) {
@@ -73,11 +73,11 @@ class BatchNormLayoutInfer : public OpLayoutInfer {
 
     auto batchnorm = op_->Clone(context_->infer_graph_);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[0]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[1]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[2]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[3]));
-    (*batchnorm).BindInput(context_->GetMapedTensor(input_tensors[4]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[0]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[1]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[2]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[3]));
+    (*batchnorm).BindInput(context_->GetMappedTensor(input_tensors[4]));
 
     (*batchnorm).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/bidirectional_rnn_layout_inference.h b/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
index 7097b73..70d8ca0 100644
--- a/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
+++ b/src/tim/transform/ops/bidirectional_rnn_layout_inference.h
@@ -72,7 +72,7 @@ class BidirectionalRnnLayoutInfer : public OpLayoutInfer {
 
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     
diff --git a/src/tim/transform/ops/broadcast_layout_inference.h b/src/tim/transform/ops/broadcast_layout_inference.h
index 41c034b..6db12ad 100644
--- a/src/tim/transform/ops/broadcast_layout_inference.h
+++ b/src/tim/transform/ops/broadcast_layout_inference.h
@@ -46,7 +46,7 @@ class BroadcastLayoutInfer : public OpLayoutInfer {
     auto cloned_op = op_->Clone(context_->infer_graph_);
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
diff --git a/src/tim/transform/ops/concat_layout_inferene.h b/src/tim/transform/ops/concat_layout_inferene.h
index d08aea1..db26d7e 100644
--- a/src/tim/transform/ops/concat_layout_inferene.h
+++ b/src/tim/transform/ops/concat_layout_inferene.h
@@ -47,7 +47,7 @@ class ConcatLayoutInfer : public OpLayoutInfer {
     auto concat = context_->infer_graph_->CreateOperation<vx::ops::Concat>(
         axis, op_->impl()->InputsTensor().size());
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*concat).BindInput(context_->GetMapedTensor(i_src));
+      (*concat).BindInput(context_->GetMappedTensor(i_src));
     }
     auto out_infer = CreateOutputsTensor(required_pv);
     (*concat).BindOutput(out_infer[0]);
diff --git a/src/tim/transform/ops/conv2d_layout_inference.h b/src/tim/transform/ops/conv2d_layout_inference.h
index 96b46ab..46fc8ac 100644
--- a/src/tim/transform/ops/conv2d_layout_inference.h
+++ b/src/tim/transform/ops/conv2d_layout_inference.h
@@ -79,10 +79,10 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -104,10 +104,10 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
       auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
       if (!final_pv->IsAligned()) {
         infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
         context_->SetPermuteVector(input_tensors[1], weight_required_pv);
       } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
         context_->SetPermuteVector(input_tensors[1], weight_pv);
       }
       context_->UpdateTensorMap(input_tensors[1], infer_weight);
@@ -121,7 +121,7 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
         infer_bias = context_->infer_graph_->CreateTensor(
             input_tensors[2]->GetSpec(), (const void*)dataRef.data());
       } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
       }
       auto bias_pv = MakeShared(1);
       context_->UpdateTensorMap(input_tensors[2], infer_bias);
@@ -131,7 +131,7 @@ class Conv2dLayoutInfer : public OpLayoutInfer {
     auto conv2d = op_->Clone(context_->infer_graph_);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*conv2d).BindInput(context_->GetMapedTensor(i_src));
+      (*conv2d).BindInput(context_->GetMappedTensor(i_src));
     }
     (*conv2d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/conv3d_layout_inference.h b/src/tim/transform/ops/conv3d_layout_inference.h
index 806199d..5b00556 100644
--- a/src/tim/transform/ops/conv3d_layout_inference.h
+++ b/src/tim/transform/ops/conv3d_layout_inference.h
@@ -81,7 +81,7 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
       } else {
         // For bias
         if (in->GetShape().size() == 1) {
-          infer_tensor = context_->GetMapedTensor(in);
+          infer_tensor = context_->GetMappedTensor(in);
           trans_pv = MakeShared(1);
         } else {
           // For input/weight
@@ -89,10 +89,10 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
           auto final_pv = pv->Reverse()->Add(required_pv);
           if (!final_pv->IsAligned()) {
             infer_tensor =
-                InsertPermute(context_->GetMapedTensor(in), final_pv);
+                InsertPermute(context_->GetMappedTensor(in), final_pv);
             trans_pv = required_pv;
           } else {
-            infer_tensor = context_->GetMapedTensor(in);
+            infer_tensor = context_->GetMappedTensor(in);
             trans_pv = pv;
           }
         }
@@ -131,7 +131,7 @@ class Conv3dLayoutInfer : public OpLayoutInfer {
         vx::DataLayout::WHDCN, vx::DataLayout::WHDIcOc);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*conv3d).BindInput(context_->GetMapedTensor(i_src));
+      (*conv3d).BindInput(context_->GetMappedTensor(i_src));
     }
     (*conv3d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/deconv2d_layout_inference.h b/src/tim/transform/ops/deconv2d_layout_inference.h
index 8788c1d..ba2ea9e 100644
--- a/src/tim/transform/ops/deconv2d_layout_inference.h
+++ b/src/tim/transform/ops/deconv2d_layout_inference.h
@@ -79,10 +79,10 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -104,10 +104,10 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
       auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
       if (!final_pv->IsAligned()) {
         infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
         context_->SetPermuteVector(input_tensors[1], weight_required_pv);
       } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
         context_->SetPermuteVector(input_tensors[1], weight_pv);
       }
       context_->UpdateTensorMap(input_tensors[1], infer_weight);
@@ -121,7 +121,7 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
         infer_bias = context_->infer_graph_->CreateTensor(
             input_tensors[2]->GetSpec(), (const void*)dataRef.data());
       } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
       }
       auto bias_pv = MakeShared(1);
       context_->UpdateTensorMap(input_tensors[2], infer_bias);
@@ -131,7 +131,7 @@ class DeConv2dLayoutInfer : public OpLayoutInfer {
     auto deconv = op_->Clone(context_->infer_graph_);
     auto infer_out = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*deconv).BindInput(context_->GetMapedTensor(i_src));
+      (*deconv).BindInput(context_->GetMappedTensor(i_src));
     }
     (*deconv).BindOutput(infer_out[0]);
 
diff --git a/src/tim/transform/ops/default_layout_inference.h b/src/tim/transform/ops/default_layout_inference.h
index 2ae29a3..391cb5c 100644
--- a/src/tim/transform/ops/default_layout_inference.h
+++ b/src/tim/transform/ops/default_layout_inference.h
@@ -53,7 +53,7 @@ class DefaultLayoutInfer : public OpLayoutInfer {
     auto cloned_op = op_->Clone(context_->infer_graph_);
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
diff --git a/src/tim/transform/ops/depth2space_layout_inference.h b/src/tim/transform/ops/depth2space_layout_inference.h
index 8d42390..a94bf34 100644
--- a/src/tim/transform/ops/depth2space_layout_inference.h
+++ b/src/tim/transform/ops/depth2space_layout_inference.h
@@ -52,7 +52,7 @@ class DepthToSpaceLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -63,7 +63,7 @@ class DepthToSpaceLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::DepthToSpace>(
             block_size, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2depth).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2depth).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*space2depth).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/elementwise_layout_inference.h b/src/tim/transform/ops/elementwise_layout_inference.h
index 1248e86..5941f83 100644
--- a/src/tim/transform/ops/elementwise_layout_inference.h
+++ b/src/tim/transform/ops/elementwise_layout_inference.h
@@ -71,7 +71,7 @@ class ElementWiseLayoutInfer : public OpLayoutInfer {
     auto required_pv = AlignPermuteVectorForElementWise();
     auto elementwise = context_->infer_graph_->CreateOperation<OpType>();
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*elementwise).BindInput(context_->GetMapedTensor(i_src));
+      (*elementwise).BindInput(context_->GetMappedTensor(i_src));
     }
     auto out_infer = CreateOutputsTensor(required_pv);
     (*elementwise).BindOutput(out_infer[0]);
@@ -120,7 +120,7 @@ class MultiplyLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<tim::vx::ops::Multiply>(
             op_->impl()->node()->nn_param.multiply.scale);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*multiply).BindInput(context_->GetMapedTensor(i_src));
+      (*multiply).BindInput(context_->GetMappedTensor(i_src));
     }
     auto out_infer = CreateOutputsTensor(required_pv);
     (*multiply).BindOutput(out_infer[0]);
diff --git a/src/tim/transform/ops/fullyconnected_layout_inference.h b/src/tim/transform/ops/fullyconnected_layout_inference.h
index 8b83888..32e87fd 100644
--- a/src/tim/transform/ops/fullyconnected_layout_inference.h
+++ b/src/tim/transform/ops/fullyconnected_layout_inference.h
@@ -65,7 +65,7 @@ class FullyConnectedLayoutInfer : public OpLayoutInfer {
         MakeShared(op_->impl()->OutputsTensor()[0]->GetShape().size());
     auto out_infer = CreateOutputsTensor(required_pv);
     for (auto in : op_->impl()->InputsTensor()) {
-      (*fcl).BindInput(context_->GetMapedTensor(in));
+      (*fcl).BindInput(context_->GetMappedTensor(in));
     }
     (*fcl).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/gather_layout_inference.h b/src/tim/transform/ops/gather_layout_inference.h
index 7ef5440..3f28c4d 100644
--- a/src/tim/transform/ops/gather_layout_inference.h
+++ b/src/tim/transform/ops/gather_layout_inference.h
@@ -45,7 +45,7 @@ class GatherLayoutInfer : public OpLayoutInfer {
         op_->impl()->node()->nn_param.gather.batch_dims);
     int32_t output_rank = -1;
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*gather).BindInput(context_->GetMapedTensor(i_src));
+      (*gather).BindInput(context_->GetMappedTensor(i_src));
       output_rank += i_src->GetShape().size();
     }
     auto infer_out = CreateOutputsTensor(
diff --git a/src/tim/transform/ops/gather_nd_layout_inference.h b/src/tim/transform/ops/gather_nd_layout_inference.h
index cebf74a..201cc0b 100644
--- a/src/tim/transform/ops/gather_nd_layout_inference.h
+++ b/src/tim/transform/ops/gather_nd_layout_inference.h
@@ -46,7 +46,7 @@ class GatherNdLayoutInfer : public OpLayoutInfer {
 
     auto gather = context_->infer_graph_->CreateOperation<vx::ops::GatherNd>();
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*gather).BindInput(context_->GetMapedTensor(i_src));
+      (*gather).BindInput(context_->GetMappedTensor(i_src));
     }
     auto infer_out = CreateOutputsTensor(
         context_->GetPermuteVector(op_->impl()->InputsTensor()[0]));
diff --git a/src/tim/transform/ops/grouped_conv2d_layout_inference.h b/src/tim/transform/ops/grouped_conv2d_layout_inference.h
index b2df948..3024348 100644
--- a/src/tim/transform/ops/grouped_conv2d_layout_inference.h
+++ b/src/tim/transform/ops/grouped_conv2d_layout_inference.h
@@ -79,10 +79,10 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       infer_input =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -104,10 +104,10 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
       auto final_pv = weight_pv->Reverse()->Add(weight_required_pv);
       if (!final_pv->IsAligned()) {
         infer_weight =
-            InsertPermute(context_->GetMapedTensor(input_tensors[1]), final_pv);
+            InsertPermute(context_->GetMappedTensor(input_tensors[1]), final_pv);
         context_->SetPermuteVector(input_tensors[1], weight_required_pv);
       } else {
-        infer_weight = context_->GetMapedTensor(input_tensors[1]);
+        infer_weight = context_->GetMappedTensor(input_tensors[1]);
         context_->SetPermuteVector(input_tensors[1], weight_pv);
       }
       context_->UpdateTensorMap(input_tensors[1], infer_weight);
@@ -121,7 +121,7 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
         infer_bias = context_->infer_graph_->CreateTensor(
             input_tensors[2]->GetSpec(), (const void*)dataRef.data());
       } else {
-        infer_bias = context_->GetMapedTensor(input_tensors[2]);
+        infer_bias = context_->GetMappedTensor(input_tensors[2]);
       }
       auto bias_pv = MakeShared(1);
       context_->UpdateTensorMap(input_tensors[2], infer_bias);
@@ -131,7 +131,7 @@ class GroupedConv2dLayoutInfer : public OpLayoutInfer {
     auto grouped_conv2d = op_->Clone(context_->infer_graph_);
     auto otensor_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : input_tensors) {
-      (*grouped_conv2d).BindInput(context_->GetMapedTensor(i_src));
+      (*grouped_conv2d).BindInput(context_->GetMappedTensor(i_src));
     }
     (*grouped_conv2d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/instance_norm_layout_inference.h b/src/tim/transform/ops/instance_norm_layout_inference.h
index bb8b73d..f0d3df7 100644
--- a/src/tim/transform/ops/instance_norm_layout_inference.h
+++ b/src/tim/transform/ops/instance_norm_layout_inference.h
@@ -63,10 +63,10 @@ class InstanceNormLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     std::shared_ptr<vx::Tensor> infer_input;
     if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -85,7 +85,7 @@ class InstanceNormLayoutInfer : public OpLayoutInfer {
     auto instance_norm = op_->Clone(context_->infer_graph_);
     auto outs_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*instance_norm).BindInput(context_->GetMapedTensor(i_src));
+      (*instance_norm).BindInput(context_->GetMappedTensor(i_src));
     }
     (*instance_norm).BindOutput(outs_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/l2normalization_layout_inference.h b/src/tim/transform/ops/l2normalization_layout_inference.h
index 00148ed..cfdcacc 100644
--- a/src/tim/transform/ops/l2normalization_layout_inference.h
+++ b/src/tim/transform/ops/l2normalization_layout_inference.h
@@ -47,7 +47,7 @@ class L2NormalizationLayoutInfer : public OpLayoutInfer {
     auto l2norm =
         context_->infer_graph_->CreateOperation<vx::ops::L2Normalization>(axis);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*l2norm).BindInput(context_->GetMapedTensor(src_input));
+    (*l2norm).BindInput(context_->GetMappedTensor(src_input));
     (*l2norm).BindOutput(infer_out[0]);
 
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/logical_layout_inference.h b/src/tim/transform/ops/logical_layout_inference.h
index ee140a1..ef7754d 100644
--- a/src/tim/transform/ops/logical_layout_inference.h
+++ b/src/tim/transform/ops/logical_layout_inference.h
@@ -71,7 +71,7 @@ class LogicalOpsLayoutInfer : public OpLayoutInfer {
     auto infer_out = CreateOutputsTensor(required_pv);
     auto logical_op = context_->infer_graph_->CreateOperation<OpTpye>();
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*logical_op).BindInput(context_->GetMapedTensor(i_src));
+      (*logical_op).BindInput(context_->GetMappedTensor(i_src));
     }
     (*logical_op).BindOutput(infer_out[0]);
 
diff --git a/src/tim/transform/ops/lrn_layout_inference.h b/src/tim/transform/ops/lrn_layout_inference.h
index 74b97fb..79749cc 100644
--- a/src/tim/transform/ops/lrn_layout_inference.h
+++ b/src/tim/transform/ops/lrn_layout_inference.h
@@ -53,7 +53,7 @@ class LRNLayoutInfer : public OpLayoutInfer {
                    ->CreateOperation<vx::ops::LocalResponseNormalization>(
                        size, alpha, beta, bias, axis);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*lrn).BindInput(context_->GetMapedTensor(src_input));
+    (*lrn).BindInput(context_->GetMappedTensor(src_input));
     (*lrn).BindOutput(infer_out[0]);
 
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/op_layout_inference.cc b/src/tim/transform/ops/op_layout_inference.cc
index 7275a28..55ac6e4 100644
--- a/src/tim/transform/ops/op_layout_inference.cc
+++ b/src/tim/transform/ops/op_layout_inference.cc
@@ -38,16 +38,13 @@ void OpLayoutInfer::OnOutputs(
   auto graph_outputs = context_->src_graph_->OutputsTensor();
   auto op_outputs = op_->impl()->OutputsTensor();
   for (const auto& out : op_outputs) {
-    if (graph_outputs.end() !=
-        std::find(graph_outputs.begin(), graph_outputs.end(), out)) {
-      context_->UpdateGraphOutputMap(out, context_->GetMapedTensor(out));
+    if (graph_outputs.cend() !=
+        std::find(graph_outputs.cbegin(), graph_outputs.cend(), out)) {
       auto pv = context_->GetPermuteVector(out);
       if (!pv->IsAligned()) {
-        auto perm_out = InsertPermute(context_->GetMapedTensor(out),
+        auto perm_out = InsertPermute(context_->GetMappedTensor(out),
                                       pv->Reverse(), true, out);
-        // Update graph out tensor
         context_->UpdateTensorMap(out, perm_out);
-        context_->UpdateGraphOutputMap(out, perm_out);
       }
       if (!context_->src_graph_->GetConsumersOp(out).empty()) {
         // The tensor is output of graph, but it also is the input of other operations
@@ -65,19 +62,18 @@ void OpLayoutInfer::OnOutputs(
 std::shared_ptr<vx::Tensor> OpLayoutInfer::InsertPermute(
     std::shared_ptr<vx::Tensor> input, std::shared_ptr<IPermuteVector> perm,
     bool is_graph_output, std::shared_ptr<vx::Tensor> src_out) {
-  auto out_spec = input->GetSpec();
+  std::shared_ptr<vx::Tensor> out_tensor;
   if (is_graph_output) {
-    auto out_shape = src_out->GetShape();
-    out_spec.SetShape(out_shape);
-    out_spec.SetAttribute(vx::TensorAttribute::OUTPUT);
+    out_tensor = context_->GetMappedGraphOutputTensor(src_out);
   } else {
-    out_spec.SetAttribute(vx::TensorAttribute::TRANSIENT);
+    auto out_spec = input->GetSpec().AsTransientSpec();
+    if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) {
+      out_spec.quantization_.SetChannelDim(
+          MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim()));
+    }
+    out_tensor = context_->infer_graph_->CreateTensor(out_spec);
   }
-  if (out_spec.quantization_.Type() == vx::QuantType::SYMMETRIC_PER_CHANNEL) {
-    out_spec.quantization_.SetChannelDim(
-        MapAxis(perm->AsStdVec(), out_spec.quantization_.ChannelDim()));
-  }
-  auto out_tensor = context_->infer_graph_->CreateTensor(out_spec);
+
   auto perm_op = context_->infer_graph_->CreateOperation<vx::ops::Transpose>(
       perm->AsStdVec());
   (*perm_op).BindInput(input).BindOutput(out_tensor);
@@ -88,20 +84,28 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
     std::shared_ptr<IPermuteVector> required_pv) {
   std::vector<std::shared_ptr<vx::Tensor>> outputs_tensor;
 
-  if (op_->impl()->OutputsTensor().size() > 1) {
+  auto op_outputs = op_->impl()->OutputsTensor();
+  if (op_outputs.size() > 1) {
     // todo(sven): potential bug here if node have multi-output and require layout inference
     std::cout << "warning at " << __FUNCTION__ << ", #" << __LINE__
               << std::endl;
   }
 
-  for (const auto& o : op_->impl()->OutputsTensor()) {
+  for (const auto& o : op_outputs) {
     auto in_shape = o->GetShape();
     auto out_spec = o->GetSpec();
-    if (!(required_pv->IsAligned())) {
+    if (!required_pv->IsAligned()) {
       out_spec = out_spec.AsTransientSpec();
     }
-    auto t_infer = context_->infer_graph_->CreateTensor(out_spec);
-    context_->UpdateTensorMap(o, t_infer);
+
+    std::shared_ptr<vx::Tensor> t_infer;
+    if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
+      t_infer = context_->GetMappedTensor(o);
+    } else {
+      t_infer = context_->infer_graph_->CreateTensor(out_spec);
+      context_->UpdateTensorMap(o, t_infer);
+    }
+
     outputs_tensor.push_back(t_infer);
   }
   return outputs_tensor;
@@ -111,19 +115,26 @@ std::vector<std::shared_ptr<vx::Tensor>> OpLayoutInfer::CreateOutputsTensor(
     const std::vector<std::shared_ptr<IPermuteVector>>& required_pv) {
   std::vector<std::shared_ptr<vx::Tensor>> outputs_tensor;
 
-  assert(required_pv.size() == (op_->impl()->OutputsTensor().size()));
+  auto op_outputs = op_->impl()->OutputsTensor();
+  assert(required_pv.size() == (op_outputs.size()));
 
-  uint32_t i = 0;
-  for (const auto& o : op_->impl()->OutputsTensor()) {
+  for (size_t i = 0; i < op_outputs.size(); i++) {
+    const auto& o = op_outputs[i];
     auto in_shape = o->GetShape();
     auto out_spec = o->GetSpec();
-    if (!(required_pv[i]->IsAligned())) {
+    if (!required_pv[i]->IsAligned()) {
       out_spec = out_spec.AsTransientSpec();
     }
-    auto t_infer = context_->infer_graph_->CreateTensor(out_spec);
-    context_->UpdateTensorMap(o, t_infer);
+
+    std::shared_ptr<vx::Tensor> t_infer;
+    if (out_spec.GetTensorAttribute() == vx::OUTPUT) {
+      t_infer = context_->GetMappedTensor(o);
+    } else {
+      t_infer = context_->infer_graph_->CreateTensor(out_spec);
+      context_->UpdateTensorMap(o, t_infer);
+    }
+
     outputs_tensor.push_back(t_infer);
-    i++;
   }
   return outputs_tensor;
 }
@@ -198,8 +209,8 @@ OpLayoutInfer::AlignPermuteVectorForMutilInputs() {
       std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
       i_src->CopyDataFromTensor(dataRef.data());
       context_->UpdateTensorMap(
-          i_src, context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                      (const void*)dataRef.data()));
+          i_src, context_->infer_graph_->CreateTensor(
+                     i_src->GetSpec(), (const void*)dataRef.data()));
       context_->SetPermuteVector(i_src, MakeShared(i_src->GetShape().size()));
     }
   } else {
@@ -215,9 +226,9 @@ OpLayoutInfer::AlignPermuteVectorForMutilInputs() {
       } else {
         auto final_pv =
             context_->GetPermuteVector(i_src)->Reverse()->Add(required_pv);
-        final_pv->IsAligned() ? perm_out = context_->GetMapedTensor(i_src)
+        final_pv->IsAligned() ? perm_out = context_->GetMappedTensor(i_src)
                               : perm_out = InsertPermute(
-                                    context_->GetMapedTensor(i_src), final_pv);
+                                    context_->GetMappedTensor(i_src), final_pv);
       }
       context_->UpdateTensorMap(i_src, perm_out);
       context_->SetPermuteVector(i_src, required_pv);
@@ -247,8 +258,8 @@ OpLayoutInfer::AlignPermuteVectorForElementWise() {
       if (required_pv->IsAligned()) {
         std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
         i_src->CopyDataFromTensor(dataRef.data());
-        perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                        (const void*)dataRef.data());
+        perm_out = context_->infer_graph_->CreateTensor(
+            i_src->GetSpec(), (const void*)dataRef.data());
       } else if (i_src->GetShape().size() == required_pv->Rank()) {
         perm_out = PermuteConstTensor(i_src, required_pv);
         // need shape expansion
@@ -263,8 +274,8 @@ OpLayoutInfer::AlignPermuteVectorForElementWise() {
       auto final_pv =
           context_->GetPermuteVector(i_src)->Reverse()->Add(required_pv);
       final_pv->IsAligned()
-          ? perm_out = context_->GetMapedTensor(i_src)
-          : perm_out = InsertPermute(context_->GetMapedTensor(i_src), final_pv);
+          ? perm_out = context_->GetMappedTensor(i_src)
+          : perm_out = InsertPermute(context_->GetMappedTensor(i_src), final_pv);
     }
     context_->UpdateTensorMap(i_src, perm_out);
     context_->SetPermuteVector(i_src, required_pv);
@@ -280,11 +291,11 @@ void OpLayoutInfer::ReverseInputsPermuteVector() {
       if (i_src->IsConstTensor()) {
         std::vector<uint8_t> dataRef(i_src->GetSpec().GetByteSize());
         i_src->CopyDataFromTensor(dataRef.data());
-        perm_out = context_->infer_graph_->CreateTensor(i_src->GetSpec(),
-                                                        (const void*)dataRef.data());
+        perm_out = context_->infer_graph_->CreateTensor(
+            i_src->GetSpec(), (const void*)dataRef.data());
         input_pv = MakeShared(i_src->GetShape().size());
       } else {
-        perm_out = context_->GetMapedTensor(i_src);
+        perm_out = context_->GetMappedTensor(i_src);
         input_pv = context_->GetPermuteVector(i_src);
         if (!input_pv->IsAligned()) {
           perm_out = InsertPermute(perm_out, input_pv->Reverse());
diff --git a/src/tim/transform/ops/pad_layout_inference.h b/src/tim/transform/ops/pad_layout_inference.h
index 8e041d1..d44ed7a 100644
--- a/src/tim/transform/ops/pad_layout_inference.h
+++ b/src/tim/transform/ops/pad_layout_inference.h
@@ -63,7 +63,7 @@ class PadLayoutInfer : public OpLayoutInfer {
         front_size, back_size, pad_value, pad_mode);
 
     auto out_infer = CreateOutputsTensor(input_pv);
-    (*pad).BindInput(context_->GetMapedTensor(i_src));
+    (*pad).BindInput(context_->GetMappedTensor(i_src));
     (*pad).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/pad_v2_layout_inference.h b/src/tim/transform/ops/pad_v2_layout_inference.h
index d9bad6a..0faa918 100644
--- a/src/tim/transform/ops/pad_v2_layout_inference.h
+++ b/src/tim/transform/ops/pad_v2_layout_inference.h
@@ -61,7 +61,7 @@ class PadV2LayoutInfer : public OpLayoutInfer {
     auto pad_v2 = context_->infer_graph_->CreateOperation<vx::ops::PadV2>(
         front_size, back_size, pad_value);
     auto out_infer = CreateOutputsTensor(input_pv);
-    (*pad_v2).BindInput(context_->GetMapedTensor(i_src));
+    (*pad_v2).BindInput(context_->GetMappedTensor(i_src));
     (*pad_v2).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/pool2d_layout_inference.h b/src/tim/transform/ops/pool2d_layout_inference.h
index 406cd3c..b2efc9a 100644
--- a/src/tim/transform/ops/pool2d_layout_inference.h
+++ b/src/tim/transform/ops/pool2d_layout_inference.h
@@ -50,7 +50,7 @@ class Pool2dLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -82,7 +82,7 @@ class Pool2dLayoutInfer : public OpLayoutInfer {
           pool_type, pad_type, ksize, stride, round_type, vx::DataLayout::WHCN);
     }
     auto otensor_infer = CreateOutputsTensor(required_pv);
-    (*pool2d).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*pool2d).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*pool2d).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/reduce_layout_inference.h b/src/tim/transform/ops/reduce_layout_inference.h
index 6766985..e0df3fe 100644
--- a/src/tim/transform/ops/reduce_layout_inference.h
+++ b/src/tim/transform/ops/reduce_layout_inference.h
@@ -60,7 +60,7 @@ class ReduceLayoutInfer : public OpLayoutInfer {
     }
     auto reduce = context_->infer_graph_->CreateOperation<OpType>(
         new_axis, op_->impl()->node()->nn_param.reduce.keep_dim);
-    (*reduce).BindInput(context_->GetMapedTensor(t_src));
+    (*reduce).BindInput(context_->GetMappedTensor(t_src));
 
     if (op_->impl()->node()->nn_param.reduce.keep_dim) {
       auto otensor_infer = CreateOutputsTensor(pv);
diff --git a/src/tim/transform/ops/resize_layout_inference.h b/src/tim/transform/ops/resize_layout_inference.h
index ada33b7..e3b2809 100644
--- a/src/tim/transform/ops/resize_layout_inference.h
+++ b/src/tim/transform/ops/resize_layout_inference.h
@@ -51,7 +51,7 @@ class ResizeLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
 
     if (!final_pv->IsAligned()) {
-      auto perm_out = InsertPermute(context_->GetMapedTensor(i_src), final_pv);
+      auto perm_out = InsertPermute(context_->GetMappedTensor(i_src), final_pv);
       context_->UpdateTensorMap(i_src, perm_out);
       context_->SetPermuteVector(i_src, final_pv);
     }
@@ -70,7 +70,7 @@ class ResizeLayoutInfer : public OpLayoutInfer {
         target_width);
 
     auto out_infer = CreateOutputsTensor(required_pv);
-    (*resize).BindInput(context_->GetMapedTensor(i_src));
+    (*resize).BindInput(context_->GetMappedTensor(i_src));
     (*resize).BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/reverse_layout_inference.h b/src/tim/transform/ops/reverse_layout_inference.h
index 3ff961d..a9bf288 100644
--- a/src/tim/transform/ops/reverse_layout_inference.h
+++ b/src/tim/transform/ops/reverse_layout_inference.h
@@ -49,7 +49,7 @@ class ReverseLayoutInfer : public OpLayoutInfer {
 
     auto reverse = context_->infer_graph_->CreateOperation<vx::ops::Reverse>(
         axis);
-    (*reverse).BindInput(context_->GetMapedTensor(src_input));
+    (*reverse).BindInput(context_->GetMappedTensor(src_input));
     auto infer_out = CreateOutputsTensor(input_pv);
     (*reverse).BindOutput(infer_out[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
diff --git a/src/tim/transform/ops/roi_align_layout_inference.h b/src/tim/transform/ops/roi_align_layout_inference.h
index 8e3d706..3f3cd3b 100644
--- a/src/tim/transform/ops/roi_align_layout_inference.h
+++ b/src/tim/transform/ops/roi_align_layout_inference.h
@@ -62,10 +62,10 @@ class RoiAlignLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     std::shared_ptr<vx::Tensor> infer_input;
     if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -84,7 +84,7 @@ class RoiAlignLayoutInfer : public OpLayoutInfer {
     auto roi_align = op_->Clone(context_->infer_graph_);
     auto outs_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*roi_align).BindInput(context_->GetMapedTensor(i_src));
+      (*roi_align).BindInput(context_->GetMappedTensor(i_src));
     }
     (*roi_align).BindOutput(outs_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/roi_pool_layout_inference.h b/src/tim/transform/ops/roi_pool_layout_inference.h
index ac4d25b..56d3028 100644
--- a/src/tim/transform/ops/roi_pool_layout_inference.h
+++ b/src/tim/transform/ops/roi_pool_layout_inference.h
@@ -62,10 +62,10 @@ class RoiPoolLayoutInfer : public OpLayoutInfer {
     auto final_pv = input_pv->Reverse()->Add(required_pv);
     std::shared_ptr<vx::Tensor> infer_input;
     if (!final_pv->IsAligned()) {
-      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      infer_input = InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     } else {
-      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      infer_input = context_->GetMappedTensor(input_tensors[0]);
       context_->SetPermuteVector(input_tensors[0], input_pv);
     }
     context_->UpdateTensorMap(input_tensors[0], infer_input);
@@ -84,7 +84,7 @@ class RoiPoolLayoutInfer : public OpLayoutInfer {
     auto roi_pool = op_->Clone(context_->infer_graph_);
     auto outs_infer = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*roi_pool).BindInput(context_->GetMapedTensor(i_src));
+      (*roi_pool).BindInput(context_->GetMappedTensor(i_src));
     }
     (*roi_pool).BindOutput(outs_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
diff --git a/src/tim/transform/ops/select_layout_inference.h b/src/tim/transform/ops/select_layout_inference.h
index ff7a118..20f0459 100644
--- a/src/tim/transform/ops/select_layout_inference.h
+++ b/src/tim/transform/ops/select_layout_inference.h
@@ -42,7 +42,7 @@ class SelectLayoutInfer : public OpLayoutInfer {
     auto select = context_->infer_graph_->CreateOperation<vx::ops::Select>();
     auto infer_out = CreateOutputsTensor(required_pv);
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-        (*select).BindInput(context_->GetMapedTensor(i_src));
+        (*select).BindInput(context_->GetMappedTensor(i_src));
     }
     (*select).BindOutput(infer_out[0]);
 
diff --git a/src/tim/transform/ops/simple_ops_layout_inference.h b/src/tim/transform/ops/simple_ops_layout_inference.h
index 5867230..28ae759 100644
--- a/src/tim/transform/ops/simple_ops_layout_inference.h
+++ b/src/tim/transform/ops/simple_ops_layout_inference.h
@@ -49,7 +49,7 @@ class SimpleOpsLayoutInfer : public OpLayoutInfer {
     auto out_infer = CreateOutputsTensor(input_pv);
     auto simple_op = context_->infer_graph_->CreateOperation<OpType>();
     (*simple_op)
-        .BindInput(context_->GetMapedTensor(i_src))
+        .BindInput(context_->GetMappedTensor(i_src))
         .BindOutput(out_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/slice_layout_inference.h b/src/tim/transform/ops/slice_layout_inference.h
index 5db4150..aba7f5a 100644
--- a/src/tim/transform/ops/slice_layout_inference.h
+++ b/src/tim/transform/ops/slice_layout_inference.h
@@ -54,7 +54,7 @@ class SliceLayoutInfer : public OpLayoutInfer {
     auto slice = context_->infer_graph_->CreateOperation<vx::ops::Slice>(
         dims, start, length);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*slice).BindInput(context_->GetMapedTensor(src_input));
+    (*slice).BindInput(context_->GetMappedTensor(src_input));
     (*slice).BindOutput(infer_out[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], input_pv);
     next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/softmax_layout_inference.h b/src/tim/transform/ops/softmax_layout_inference.h
index 57a2990..967a288 100644
--- a/src/tim/transform/ops/softmax_layout_inference.h
+++ b/src/tim/transform/ops/softmax_layout_inference.h
@@ -49,7 +49,7 @@ class SoftmaxLayoutInfer : public OpLayoutInfer {
     auto softmax =
         context_->infer_graph_->CreateOperation<vx::ops::Softmax>(beta, axis);
     auto otensor_infer = CreateOutputsTensor(required_pv);
-    (*softmax).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*softmax).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*softmax).BindOutput(otensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/space2batch_layout_inference.h b/src/tim/transform/ops/space2batch_layout_inference.h
index 4eb6013..ee6e31f 100644
--- a/src/tim/transform/ops/space2batch_layout_inference.h
+++ b/src/tim/transform/ops/space2batch_layout_inference.h
@@ -51,7 +51,7 @@ class Space2BatchLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -70,7 +70,7 @@ class Space2BatchLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::Space2Batch>(
             block_size, pad, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2batch).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2batch).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*space2batch).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/space2depth_layout_inference.h b/src/tim/transform/ops/space2depth_layout_inference.h
index e861a23..ecac32c 100644
--- a/src/tim/transform/ops/space2depth_layout_inference.h
+++ b/src/tim/transform/ops/space2depth_layout_inference.h
@@ -51,7 +51,7 @@ class SpaceToDepthLayoutInfer : public OpLayoutInfer {
     auto final_pv = pv->Reverse()->Add(required_pv);
     if (!final_pv->IsAligned()) {
       auto perm_out =
-          InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+          InsertPermute(context_->GetMappedTensor(input_tensors[0]), final_pv);
       context_->UpdateTensorMap(input_tensors[0], perm_out);
       context_->SetPermuteVector(input_tensors[0], required_pv);
     }
@@ -64,7 +64,7 @@ class SpaceToDepthLayoutInfer : public OpLayoutInfer {
         context_->infer_graph_->CreateOperation<vx::ops::SpaceToDepth>(
             block_size, vx::DataLayout::WHCN);
     auto out_tensor_infer = CreateOutputsTensor(required_pv);
-    (*space2depth).BindInput(context_->GetMapedTensor(input_tensors[0]));
+    (*space2depth).BindInput(context_->GetMappedTensor(input_tensors[0]));
     (*space2depth).BindOutput(out_tensor_infer[0]);
     context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
     // Add out tensor of src_graph into next_tensor
diff --git a/src/tim/transform/ops/split_layout_inference.h b/src/tim/transform/ops/split_layout_inference.h
index 0479641..d80a4d2 100644
--- a/src/tim/transform/ops/split_layout_inference.h
+++ b/src/tim/transform/ops/split_layout_inference.h
@@ -51,7 +51,7 @@ class SplitLayoutInfer : public OpLayoutInfer {
     auto split =
         context_->infer_graph_->CreateOperation<vx::ops::Split>(axis, slices);
     auto infer_out = CreateOutputsTensor(input_pv);
-    (*split).BindInput(context_->GetMapedTensor(input_tensor));
+    (*split).BindInput(context_->GetMappedTensor(input_tensor));
     (*split).BindOutputs(infer_out);
     for (const auto& out : op_->impl()->OutputsTensor()) {
         context_->SetPermuteVector(out, input_pv);
diff --git a/src/tim/transform/ops/squeeze_layout_inference.h b/src/tim/transform/ops/squeeze_layout_inference.h
index 719e352..a183530 100644
--- a/src/tim/transform/ops/squeeze_layout_inference.h
+++ b/src/tim/transform/ops/squeeze_layout_inference.h
@@ -50,7 +50,7 @@ class SqueezeLayoutInfer : public OpLayoutInfer {
     auto squeeze =
         context_->infer_graph_->CreateOperation<vx::ops::Squeeze>(axis);
     (*squeeze).BindInput(
-        context_->GetMapedTensor(op_->impl()->InputsTensor()[0]));
+        context_->GetMappedTensor(op_->impl()->InputsTensor()[0]));
 
     auto required_pv =
         MakeShared(op_->impl()->OutputsTensor()[0]->GetShape().size());
diff --git a/src/tim/transform/ops/stack_layout_inference.h b/src/tim/transform/ops/stack_layout_inference.h
index f649a23..7f46456 100644
--- a/src/tim/transform/ops/stack_layout_inference.h
+++ b/src/tim/transform/ops/stack_layout_inference.h
@@ -53,7 +53,7 @@ class StackLayoutInfer : public OpLayoutInfer {
     auto aligninput_pv = AlignPermuteVectorForMutilInputs();
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*stack).BindInput(context_->GetMapedTensor(i_src));
+      (*stack).BindInput(context_->GetMappedTensor(i_src));
     }
 
     if (axis < 0) {
diff --git a/src/tim/transform/ops/stridedslice_layout_inference.h b/src/tim/transform/ops/stridedslice_layout_inference.h
index 1cfca83..6f0136e 100644
--- a/src/tim/transform/ops/stridedslice_layout_inference.h
+++ b/src/tim/transform/ops/stridedslice_layout_inference.h
@@ -105,7 +105,7 @@ class StridedSliceLayoutInfer : public OpLayoutInfer {
       }
 
       auto infer_out = CreateOutputsTensor(out_pv);
-      (*strided_slice).BindInput(context_->GetMapedTensor(src_input));
+      (*strided_slice).BindInput(context_->GetMappedTensor(src_input));
       (*strided_slice).BindOutput(infer_out[0]);
       context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], out_pv);
       next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
diff --git a/src/tim/transform/ops/transpose_layout_inference.h b/src/tim/transform/ops/transpose_layout_inference.h
index 88ee239..6578dba 100644
--- a/src/tim/transform/ops/transpose_layout_inference.h
+++ b/src/tim/transform/ops/transpose_layout_inference.h
@@ -42,7 +42,7 @@ class TransposeLayoutInfer : public OpLayoutInfer {
   void OnInputs(
       std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
     auto src_input = op_->impl()->InputsTensor()[0];
-    auto infer_input = context_->GetMapedTensor(src_input);
+    auto infer_input = context_->GetMappedTensor(src_input);
     auto input_pv = context_->GetPermuteVector(src_input);
 
     std::vector<uint32_t> perm(op_->impl()->node()->nn_param.permute.dim_num);
diff --git a/src/tim/transform/ops/unidirectional_lstm_layout_inference.h b/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
index 47b802b..9a099ec 100644
--- a/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
+++ b/src/tim/transform/ops/unidirectional_lstm_layout_inference.h
@@ -72,7 +72,7 @@ class UnidirectionalLstmLayoutInfer : public OpLayoutInfer {
 
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;
diff --git a/src/tim/transform/ops/unidirectional_rnn_layout_inference.h b/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
index 7bbbc09..8a8c261 100644
--- a/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
+++ b/src/tim/transform/ops/unidirectional_rnn_layout_inference.h
@@ -72,7 +72,7 @@ class UnidirectionalRnnLayoutInfer : public OpLayoutInfer {
 
 
     for (const auto& i_src : op_->impl()->InputsTensor()) {
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     
diff --git a/src/tim/transform/ops/yolov4_layout_inference.h b/src/tim/transform/ops/yolov4_layout_inference.h
index 2e44bbe..6698e08 100644
--- a/src/tim/transform/ops/yolov4_layout_inference.h
+++ b/src/tim/transform/ops/yolov4_layout_inference.h
@@ -53,7 +53,7 @@ class Yolov4LayoutInfer : public OpLayoutInfer {
         context_->SetPermuteVector(i_src, MakeShared(4));
         context_->UpdateTensorMap(i_src, i_infer);
       } 
-      (*cloned_op).BindInput(context_->GetMapedTensor(i_src));
+      (*cloned_op).BindInput(context_->GetMappedTensor(i_src));
     }
 
     std::vector<std::shared_ptr<IPermuteVector>> required_pv_lst;