diff --git a/.gitignore b/.gitignore
index 78806d7..8def752 100644
--- a/.gitignore
+++ b/.gitignore
@@ -335,3 +335,4 @@ ASALocalRun/
 
 # IDE
 .settings/
+build/
diff --git a/include/tim/vx/operation.h b/include/tim/vx/operation.h
index 57fa7ef..e978e45 100644
--- a/include/tim/vx/operation.h
+++ b/include/tim/vx/operation.h
@@ -48,6 +48,7 @@ class Operation {
       RoundType down_scale_size_rounding = RoundType::FLOOR,
       uint32_t accumulator_bits = 0);
   std::unique_ptr<OperationImpl>& impl();
+  const std::unique_ptr<OperationImpl>& impl() const;
 
  protected:
   std::unique_ptr<OperationImpl> impl_;
diff --git a/include/tim/vx/ops/unidirectional_sequence_lstm.h b/include/tim/vx/ops/unidirectional_sequence_lstm.h
new file mode 100644
index 0000000..629ad9f
--- /dev/null
+++ b/include/tim/vx/ops/unidirectional_sequence_lstm.h
@@ -0,0 +1,65 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#ifndef TIM_VX_OPS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_
+#define TIM_VX_OPS_UNIDIRECTIONAL_SEQUENCE_LSTM_H_
+#include "tim/vx/operation.h"
+
+namespace tim {
+namespace vx {
+namespace ops {
+    /**
+     * ## Unidirectional sequence lstm
+     *  how to bind input/output: take unidirectional_sequence_lstm_test.cc
+     */
+    class UnidirectionalSequenceLstm: public Operation {
+     public:
+      enum ActivationType {
+        kNONE = 0,
+        kRELU = 1,
+        kRELU6 = 2,
+        kTANH = 3,
+        kSIGMOID = 4,
+        kHARDSIGMOID = 5,
+        kCOUNT
+      };
+
+      UnidirectionalSequenceLstm(
+          Graph* graph, float cell_clip, float proj_clip,
+          ActivationType act_type, float forget_bias, bool time_major = false,
+          ActivationType recurrent_act_type = ActivationType::kNONE,
+          bool return_sequences = false /*False: only return last state*/
+      );
+
+      std::shared_ptr<Operation> Clone(
+          std::shared_ptr<Graph>& graph) const override;
+
+     protected:
+      ActivationType act_type_;
+      ActivationType recurrent_act_type_;
+    };
+}
+}  // namespace vx
+}  // namespace tim
+
+#endif
diff --git a/src/tim/vx/operation.cc b/src/tim/vx/operation.cc
index d180db4..a5f8572 100644
--- a/src/tim/vx/operation.cc
+++ b/src/tim/vx/operation.cc
@@ -90,6 +90,7 @@ Operation::Operation(Graph* graph, uint32_t operation_id,
 Operation::~Operation() {}
 
 std::unique_ptr<OperationImpl>& Operation::impl() { return impl_; }
+const std::unique_ptr<OperationImpl>& Operation::impl() const { return impl_; }
 
 Operation& Operation::BindInput(const std::shared_ptr<Tensor>& tensor) {
   impl_->BindInput(tensor);
diff --git a/src/tim/vx/ops/README.md b/src/tim/vx/ops/README.md
index 376900d..65e5aac 100644
--- a/src/tim/vx/ops/README.md
+++ b/src/tim/vx/ops/README.md
@@ -108,8 +108,8 @@ SpatialTransformer|SPATIAL_TRANSFORMER|Mapped|[SpatialTransformer](https://githu
 |BidirectionalSequenceRNN|BIDIRECTIONAL_SEQUENCE_RNN|Planned 21Q4|[ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_RNN](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a487fc5ae247de828f13e62b99f259f3c)
 |RNNCell|RNNCELL_OVXLIB|Planned 21Q3|[ANEURALNETWORKS_RNN](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0acd2684ac9c73bb29767b534e78a332e8)
 |BidirectionalSequenceLSTM|BIDIRECTIONAL_SEQUENCE_LSTM|Planned 21Q4|[ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a492a71cb7aa50b9a1a834a3cb269d778)
-|UnidirectionalSequenceLSTM|LSTM_OVXLIB|Planned 21Q4|[ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0aaf30e491ad0b1fc7602cbde695b2c859)
-|LSTMCell|LSTMUNIT_OVXLIB|Planned 21Q3|[ANEURALNETWORKS_LSTM](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0ad0377e8c305e596fb7f64ff896671fc5)
+|UnidirectionalSequenceLSTM|LSTM_OVXLIB|Mapped|[ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0aaf30e491ad0b1fc7602cbde695b2c859)
+|LSTMCell|LSTMUNIT_OVXLIB|replace with UnidirectionalSequenceLSTM by set n_step = 1 |[ANEURALNETWORKS_LSTM](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0ad0377e8c305e596fb7f64ff896671fc5)
 ||PRE_PROCESS|Planned 21Q4|Image Preprocessing (YUV2RGB, Input Normalization, Resizing, etc)
 ||HASHTABLE_LOOKUP|Planned 21Q4|[ANEURALNETWORKS_HASHTABLE_LOOKUP](https://developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0aca92716c8c73c1f0fa7f0757916fee26)
 ||EMBEDDING_LOOKUP|Planned 21Q4|[ANEURALNETWORKS_EMBEDDING_LOOKUP](developer.android.com/ndk/reference/group/neural-networks#group___neural_networks_1ggaabbe492c60331b13038e39d4207940e0a8d2ada77adb74357fc0770405bca0e3)
diff --git a/src/tim/vx/ops/unidirectional_sequence_lstm.cc b/src/tim/vx/ops/unidirectional_sequence_lstm.cc
new file mode 100644
index 0000000..2eb81d6
--- /dev/null
+++ b/src/tim/vx/ops/unidirectional_sequence_lstm.cc
@@ -0,0 +1,86 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#include "tim/vx/ops/unidirectional_sequence_lstm.h"
+
+#include "operation_private.h"
+#include "vsi_nn_pub.h"
+
+namespace tim {
+namespace vx {
+namespace ops {
+
+vsi_nn_activation_e downcast_act_type(UnidirectionalSequenceLstm::ActivationType act) {
+    switch (act) {
+        case UnidirectionalSequenceLstm::ActivationType::kRELU:
+            return VSI_NN_LSTMUNIT_ACT_RELU;
+        case UnidirectionalSequenceLstm::ActivationType::kRELU6:
+            return VSI_NN_LSTMUNIT_ACT_RELU6;
+        case UnidirectionalSequenceLstm::ActivationType::kTANH:
+            return VSI_NN_LSTMUNIT_ACT_TANH;
+        case UnidirectionalSequenceLstm::ActivationType::kSIGMOID:
+            return VSI_NN_LSTMUNIT_ACT_SIGMOID;
+        case UnidirectionalSequenceLstm::ActivationType::kHARDSIGMOID:
+            return VSI_NN_LSTMUNIT_ACT_HARD_SIGMOID;
+        default: {
+            VSILOGW("Not supported activition type for LSTM = %d", static_cast<int32_t>(act));
+            return VSI_NN_ACT_NONE;
+        }
+    }
+}
+
+UnidirectionalSequenceLstm::UnidirectionalSequenceLstm(
+    Graph* graph, float cell_clip, float proj_clip, ActivationType act_type,
+    float forget_bias, bool time_major, ActivationType recurrent_act_type,
+    bool return_sequences)
+    : Operation(graph, VSI_NN_OP_LSTM_OVXLIB, LSTM_INPUT_CNT, LSTM_OUTPUT_CNT),
+      act_type_(act_type),
+      recurrent_act_type_(recurrent_act_type) {
+  this->impl()->node()->nn_param.lstm_ovxlib.cell_clip = cell_clip;
+  this->impl()->node()->nn_param.lstm_ovxlib.proj_clip = proj_clip;
+  this->impl()->node()->nn_param.lstm_ovxlib.time_major = time_major;
+  this->impl()->node()->nn_param.lstm_ovxlib.activation =
+      downcast_act_type(act_type);
+  this->impl()->node()->nn_param.lstm_ovxlib.forget_bias = forget_bias;
+  this->impl()->node()->nn_param.lstm_ovxlib.recurrent_activation =
+      downcast_act_type(recurrent_act_type);
+  this->impl()->node()->nn_param.lstm_ovxlib.return_sequences =
+      return_sequences;
+}
+
+std::shared_ptr<Operation> UnidirectionalSequenceLstm::Clone(std::shared_ptr<Graph>& graph) const {
+    auto cloned_op =
+        graph->CreateOperation<tim::vx::ops::UnidirectionalSequenceLstm>(
+            this->impl()->node()->nn_param.lstm_ovxlib.cell_clip,
+            this->impl()->node()->nn_param.lstm_ovxlib.proj_clip,
+            act_type_,
+            this->impl()->node()->nn_param.lstm_ovxlib.forget_bias,
+            this->impl()->node()->nn_param.lstm_ovxlib.time_major,
+            recurrent_act_type_,
+            this->impl()->node()->nn_param.lstm_ovxlib.return_sequences);
+    return cloned_op;
+}
+
+}
+}  // namespace vx
+}  // namespace tim
diff --git a/src/tim/vx/ops/unidirectional_sequence_lstm_test.cc b/src/tim/vx/ops/unidirectional_sequence_lstm_test.cc
new file mode 100644
index 0000000..7a6392f
--- /dev/null
+++ b/src/tim/vx/ops/unidirectional_sequence_lstm_test.cc
@@ -0,0 +1,209 @@
+/****************************************************************************
+*
+*    Copyright (c) 2021 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#include "tim/vx/context.h"
+#include "tim/vx/graph.h"
+#include "tim/vx/ops/unidirectional_sequence_lstm.h"
+
+#include "gtest/gtest.h"
+
+std::shared_ptr<tim::vx::Tensor> make_empty_tensor(
+    std::shared_ptr<tim::vx::Graph> graph, const tim::vx::ShapeType& shape,
+    const tim::vx::TensorAttribute& role)  //, const float& default_value) {
+{
+  tim::vx::TensorSpec spec(tim::vx::DataType::FLOAT32, shape, role);
+  auto tensor = graph->CreateTensor(spec);
+
+  uint32_t count = 1;
+  for (auto dim : shape) {count *= dim;}
+
+  std::vector<float> default_value(count);
+  for(auto& i: default_value) {
+      i = 0.f;
+  }
+
+  tensor->CopyDataToTensor(default_value.data(), count*sizeof(float));
+
+  return tensor;
+}
+
+TEST(LSTM_CELL, shape_in_2_cell_4_out_4_float32) {
+    // NoCifg_NoPeephole_NoProjection_NoLayerNorm
+    auto ctx = tim::vx::Context::Create();
+    auto g = ctx->CreateGraph();
+
+    uint32_t n_batch, n_step, n_cell, n_input, n_output;
+    n_batch = 1, n_step = 1, n_cell = 4, n_input = 2, n_output = 4;
+    tim::vx::ShapeType input_shape, cell_shape, state_shape;
+    input_shape = {n_batch, n_step, n_input}; // non-time-major
+
+    tim::vx::TensorSpec input_tensor_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_input, n_step, n_batch}), tim::vx::TensorAttribute::INPUT);
+
+    tim::vx::TensorSpec i2i_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_input, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec i2i_bias_spec  (tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec i2c_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_input, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec i2c_bias_spec  (tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec i2f_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_input, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec i2f_bias_spec  (tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec i2o_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_input, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec i2o_bias_spec  (tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_cell}), tim::vx::TensorAttribute::CONSTANT);
+
+    tim::vx::TensorSpec output_spec    (tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_output, n_step, n_batch}), tim::vx::TensorAttribute::OUTPUT);
+    // tim::vx::TensorSpec hstate_spec    (tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_batch, n_output}), tim::vx::TensorAttribute::OUTPUT);
+    // tim::vx::TensorSpec cstate_spec    (tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_batch, n_cell}), tim::vx::TensorAttribute::OUTPUT);
+    auto output_tensor = g->CreateTensor(output_spec);
+
+    std::vector<float> input2input_weights, input2cell_weights, input2forget_weights, input2output_weights;
+    // tim::vx::Tensor input2input_weights_tensor, input2cell_weights_tensor, input2forget_weights_tensor, input2output_weights_tensor;
+
+    input2input_weights = {-0.45018822, -0.02338299, -0.0870589,  -0.34550029,
+                             0.04266912,  -0.15680569, -0.34856534, 0.43890524};
+    std::vector<float> input_gate_bias = {0.0, 0.0, 0.0, 0.0};
+    auto input2input_weights_tensor = g->CreateTensor(i2i_weight_spec, input2input_weights.data());
+    auto input_gate_bias_tensor = g->CreateTensor(i2i_bias_spec, input_gate_bias.data());
+
+    input2cell_weights = {-0.50013041, 0.1370284,  0.11810488, 0.2013163,
+                            -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+    std::vector<float> cell_gate_bias = {0.0, 0.0, 0.0, 0.0};
+    auto cell_gate_bias_tensor = g->CreateTensor(i2c_bias_spec, cell_gate_bias.data());
+    auto input2cell_weights_tensor = g->CreateTensor(i2c_weight_spec, input2cell_weights.data());
+
+    input2forget_weights = {0.09701663,  0.20334584,  -0.50592935,
+                              -0.31343272, -0.40032279, 0.44781327,
+                              0.01387155,  -0.35593212};
+    std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+    auto input2forget_weights_tensor = g->CreateTensor(i2f_weight_spec, input2forget_weights.data());
+    auto forget_gate_bias_tensor = g->CreateTensor(i2f_bias_spec, forget_gate_bias.data());
+
+    input2output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+                              0.44272184,  0.03897077,  -0.1556896, 0.19487578};
+    std::vector<float> output_gate_bias = {0.0, 0.0, 0.0, 0.0};
+    auto input2output_weights_tensor = g->CreateTensor(i2o_weight_spec, input2output_weights.data());
+    auto output_gate_bias_tensor = g->CreateTensor(i2o_bias_spec, output_gate_bias.data());
+
+    tim::vx::TensorSpec r2i_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_output, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec r2c_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_output, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec r2f_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_output, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    tim::vx::TensorSpec r2o_weight_spec(tim::vx::DataType::FLOAT32, tim::vx::ShapeType({n_output, n_cell}), tim::vx::TensorAttribute::CONSTANT);
+    std::vector<float> recurrent_to_input_weights = {
+        -0.0063535,  -0.2042388,  0.31454784,  -0.35746509,
+        0.28902304,  0.08183324,  -0.16555229, 0.02286911,
+        -0.13566875, 0.03034258,  0.48091322,  -0.12528998,
+        0.24077177,  -0.51332325, -0.33502164, 0.10629296};
+
+    std::vector<float> recurrent_to_cell_weights = {
+        -0.3407414,  0.24443203,  -0.2078532,  0.26320225,
+        0.05695659,  -0.00123841, -0.4744786,  -0.35869038,
+        -0.06418842, -0.13502428, -0.501764,   0.22830659,
+        -0.46367589, 0.26016325,  -0.03894562, -0.16368064};
+
+    std::vector<float> recurrent_to_forget_weights = {
+        -0.48684245, -0.06655136, 0.42224967,  0.2112639,
+        0.27654213,  0.20864892,  -0.07646349, 0.45877004,
+        0.00141793,  -0.14609534, 0.36447752,  0.09196436,
+        0.28053468,  0.01560611,  -0.20127171, -0.01140004};
+
+    std::vector<float> recurrent_to_output_weights = {
+        0.43385774,  -0.17194885, 0.2718237,  0.09215671,
+        0.24107647,  -0.39835793, 0.18212086, 0.01301402,
+        0.48572797,  -0.50656658, 0.20047462, -0.20607421,
+        -0.51818722, -0.15390486, 0.0468148,  0.39922136};
+
+    auto r2i_weight_tensor = g->CreateTensor(r2i_weight_spec, recurrent_to_input_weights.data());
+    auto r2c_weight_tensor = g->CreateTensor(r2c_weight_spec, recurrent_to_cell_weights.data());
+    auto r2f_weight_tensor = g->CreateTensor(r2f_weight_spec, recurrent_to_forget_weights.data());
+    auto r2o_weight_tensor = g->CreateTensor(r2o_weight_spec, recurrent_to_output_weights.data());
+
+    // std::vector<std::vector<float>> golden_output = {
+    //     {{-0.02973187, 0.1229473, 0.20885126, -0.15358765}},
+    //     {{-0.03716109, 0.12507336, 0.41193449, -0.20860538}},
+    //     {{-0.15053082, 0.09120187, 0.24278517, -0.12222792}}};
+
+    std::vector<float> input = {2,3};
+    auto input_tensor = g->CreateTensor(input_tensor_spec, input.data());
+
+    auto lstm_cell_op = g->CreateOperation<tim::vx::ops::UnidirectionalSequenceLstm>(
+        0.0, 0.0, tim::vx::ops::UnidirectionalSequenceLstm::ActivationType::kTANH, 0.0, false,
+        tim::vx::ops::UnidirectionalSequenceLstm::kNONE, true);
+
+    (*lstm_cell_op)
+        .BindInputs({
+            input_tensor,
+            g->CreateTensorPlaceHolder(),   /*h_state*/
+            g->CreateTensorPlaceHolder(),   /*c_state*/
+            input2input_weights_tensor,
+            input2forget_weights_tensor,
+            input2cell_weights_tensor,
+            input2output_weights_tensor,
+
+            r2i_weight_tensor,
+            r2c_weight_tensor,
+            r2f_weight_tensor,
+            r2o_weight_tensor,
+
+            g->CreateTensorPlaceHolder(),       /*weight_c2i*/
+            g->CreateTensorPlaceHolder(),       /*weight_c2f*/
+            g->CreateTensorPlaceHolder(),       /*weight_c2o*/
+
+            input_gate_bias_tensor,
+            forget_gate_bias_tensor,
+            cell_gate_bias_tensor,
+            output_gate_bias_tensor,
+
+            // optional for projection
+            /*weight_prj*/
+            /*bias_prj*/
+
+            // Layer norm IFCO
+            // g->CreateTensorPlaceHolder(),
+            // g->CreateTensorPlaceHolder(),
+            // g->CreateTensorPlaceHolder(),
+            // g->CreateTensorPlaceHolder(),
+
+            // AUX weight_i2i|i2f|i2c|i2o
+            // g->CreateTensorPlaceHolder(),
+            // g->CreateTensorPlaceHolder(),
+            // g->CreateTensorPlaceHolder(),
+            // g->CreateTensorPlaceHolder(),
+        })
+        .BindOutputs({
+            output_tensor,
+            make_empty_tensor(
+                g, tim::vx::ShapeType({n_output, n_batch}),
+                tim::vx::TensorAttribute::OUTPUT),  //  Output_H_STATE
+            make_empty_tensor(
+                g, tim::vx::ShapeType({n_cell, n_batch}),
+                tim::vx::TensorAttribute::OUTPUT),  //  output_C_State
+        });
+
+    g->Compile();
+    g->Run();
+
+    std::vector<float> golden = {{-0.02973187, 0.1229473, 0.20885126, -0.15358765}};
+    std::vector<float> real(golden.size());
+    output_tensor->CopyDataFromTensor(real.data());
+
+    for(uint32_t i = 0; i < golden.size(); ++i) {
+        EXPECT_NEAR(golden[i], real[i], 0.001f) << "Failed at " << i << "th item";
+    }
+}
\ No newline at end of file