From 5e09e98c1af6fa866d79345d68e8d79e7d5ed9ab Mon Sep 17 00:00:00 2001
From: chxin66 <57057788+chxin66@users.noreply.github.com>
Date: Tue, 17 Aug 2021 20:37:12 +0800
Subject: [PATCH] Add Gelu support for tim::vx (#153)

* Add map for Gelu

Signed-off-by: Chen Xin <jack.chen@verisilicon.com>
---
 include/tim/vx/ops/activations.h   |  10 +++
 src/tim/vx/ops/activations.cc      |   9 ++
 src/tim/vx/ops/activations_test.cc | 137 +++++++++++++++++++++++++++++
 src/tim/vx/ops/addn_test.cc        |  72 +++++++--------
 4 files changed, 192 insertions(+), 36 deletions(-)
diff --git a/include/tim/vx/ops/activations.h b/include/tim/vx/ops/activations.h
index 8f8a034..2eadede 100644
--- a/include/tim/vx/ops/activations.h
+++ b/include/tim/vx/ops/activations.h
@@ -63,6 +63,9 @@ namespace ops {
  *    - axis                : describes the axis of the inputs when coerced to 2D.
  *
  *   Linear(x, a, b)        : a*x + b.
+ *
+ *   Gelu(x)                : x * P(X <= x), where P(x) ~ N(0, 1). https://tensorflow.google.cn/api_docs/python/tf/nn/gelu
+
  * ```
  */
 
@@ -119,6 +122,13 @@ class Linear : public Operation {
   float b_;
 };
 
+class Gelu : public Operation {
+  public:
+   explicit Gelu(Graph* graph, bool approximate = false);
+   std::shared_ptr<Operation> Clone(
+      std::shared_ptr<Graph>& graph) const override;
+};
+
 }  // namespace ops
 }  // namespace vx
 }  // namespace tim
diff --git a/src/tim/vx/ops/activations.cc b/src/tim/vx/ops/activations.cc
index 81a1d36..855a4d1 100644
--- a/src/tim/vx/ops/activations.cc
+++ b/src/tim/vx/ops/activations.cc
@@ -107,6 +107,15 @@ std::shared_ptr<Operation> Linear::Clone(std::shared_ptr<Graph>& graph) const {
   return graph->CreateOperation<Linear>(this->a_, this->b_);
 }
 
+Gelu::Gelu(Graph* graph, bool approximate)
+    : Operation(graph, VSI_NN_OP_GELU){
+      this->impl()->node()->nn_param.gelu.approximate = approximate;
+    }
+
+std::shared_ptr<Operation> Gelu::Clone(std::shared_ptr<Graph>& graph) const {
+  return graph->CreateOperation<Gelu>(this->impl()->node()->nn_param.gelu.approximate);
+}
+
 }  // namespace ops
 }  // namespace vx
 }  // namespace tim
diff --git a/src/tim/vx/ops/activations_test.cc b/src/tim/vx/ops/activations_test.cc
index d18e284..dfe5894 100644
--- a/src/tim/vx/ops/activations_test.cc
+++ b/src/tim/vx/ops/activations_test.cc
@@ -26,6 +26,20 @@
 #include "tim/vx/ops/activations.h"
 
 #include "gtest/gtest.h"
+#include "src/tim/vx/test_utils.h"
+
+namespace {
+template<typename T>
+::testing::AssertionResult ArraysMatch(const std::vector<T>& expected,
+                                       const std::vector<T>& actual,
+                                       T abs_error){
+    for (size_t i = 0; i < expected.size(); ++i){
+        EXPECT_NEAR(expected[i], actual[i], abs_error) << "at index:" << i;
+    }
+
+    return ::testing::AssertionSuccess();
+}
+}
 
 TEST(Linear, shape_5_1_fp32) {
     auto ctx = tim::vx::Context::Create();
@@ -83,5 +97,128 @@ TEST(Linear, shape_5_1_fp32_omit_b) {
     EXPECT_EQ(golden, output);
 }
 
+TEST(Gelu, shape_5_1_fp32) {
+    auto ctx = tim::vx::Context::Create();
+    auto graph = ctx->CreateGraph();
+
+    tim::vx::ShapeType in_shape({5, 1});
+    tim::vx::ShapeType out_shape({5, 1});
+    tim::vx::TensorSpec in_spec(tim::vx::DataType::FLOAT32,
+                    in_shape, tim::vx::TensorAttribute::INPUT);
+    tim::vx::TensorSpec out_spec(tim::vx::DataType::FLOAT32,
+                            out_shape, tim::vx::TensorAttribute::OUTPUT);
+
+    auto in_tensor = graph->CreateTensor(in_spec);
+    auto out_tensor = graph->CreateTensor(out_spec);
+
+    std::vector<float> in_data = {
+        -3, -1, 0, 1, 3
+    };
+    std::vector<float> golden = {
+        -0.00404951, -0.15865529, 0, 0.8413447, 2.9959507
+    };
+
+    EXPECT_TRUE(in_tensor->CopyDataToTensor(in_data.data(), in_data.size() * sizeof(float)));
+    auto op = graph->CreateOperation<tim::vx::ops::Gelu>(false);
+    (*op).BindInput(in_tensor).BindOutput(out_tensor);
+
+    EXPECT_TRUE(graph->Compile());
+    EXPECT_TRUE(graph->Run());
+
+    std::vector<float> output(golden.size());
+    EXPECT_TRUE(out_tensor->CopyDataFromTensor(output.data()));
+    EXPECT_TRUE(ArraysMatch(golden, output, 1e-5f));
+}
+
+TEST(Gelu, shape_5_1_fp32_approximate) {
+    auto ctx = tim::vx::Context::Create();
+    auto graph = ctx->CreateGraph();
+
+    tim::vx::ShapeType in_shape({5, 1});
+    tim::vx::ShapeType out_shape({5, 1});
+    tim::vx::TensorSpec in_spec(tim::vx::DataType::FLOAT32,
+                    in_shape, tim::vx::TensorAttribute::INPUT);
+    tim::vx::TensorSpec out_spec(tim::vx::DataType::FLOAT32,
+                            out_shape, tim::vx::TensorAttribute::OUTPUT);
+
+    auto in_tensor = graph->CreateTensor(in_spec);
+    auto out_tensor = graph->CreateTensor(out_spec);
+
+    std::vector<float> in_data = {
+        -3, -1, 0, 1, 3
+    };
+    std::vector<float> golden = {
+        -0.00363752, -0.15880796, 0, 0.841192, 2.9963627
+    };
+
+    EXPECT_TRUE(in_tensor->CopyDataToTensor(in_data.data(), in_data.size() * sizeof(float)));
+    auto op = graph->CreateOperation<tim::vx::ops::Gelu>(true);
+    (*op).BindInput(in_tensor).BindOutput(out_tensor);
+
+    EXPECT_TRUE(graph->Compile());
+    EXPECT_TRUE(graph->Run());
+
+    std::vector<float> output(golden.size());
+    EXPECT_TRUE(out_tensor->CopyDataFromTensor(output.data()));
+    EXPECT_TRUE(ArraysMatch(golden, output, 1e-5f));
+}
+
+TEST(Gelu, shape_5_1_uint8_Quantized) {
+    auto ctx = tim::vx::Context::Create();
+    auto graph = ctx->CreateGraph();
+
+    tim::vx::ShapeType in_shape({5, 1});
+    tim::vx::ShapeType out_shape({5, 1});
+
+    const float InputMin = -127, InputMax = 128, OutputMin = -127, OutputMax = 128;
+
+    std::pair<float, int32_t> scalesAndZp;
+
+    scalesAndZp = QuantizationParams<uint8_t>(InputMin, InputMax);
+    std::vector<float> scalesInput = {scalesAndZp.first};   //scale
+    std::vector<int32_t> zeroPointsInput = {scalesAndZp.second}; //zero point
+
+    scalesAndZp = QuantizationParams<u_int8_t>(OutputMin, OutputMax);
+    std::vector<float> scalesOutput = {scalesAndZp.first};
+    std::vector<int32_t> zeroPointsOutput = {scalesAndZp.second};
 
 
+    tim::vx::Quantization quantInput(tim::vx::QuantType::ASYMMETRIC, 1,
+                                   scalesInput, zeroPointsInput);
+    tim::vx::Quantization quantOutput(tim::vx::QuantType::ASYMMETRIC, 1,
+                                    scalesOutput, zeroPointsOutput);
+
+    tim::vx::TensorSpec input_spec(tim::vx::DataType::UINT8, in_shape,
+                                 tim::vx::TensorAttribute::INPUT, quantInput);
+
+    tim::vx::TensorSpec output_spec(tim::vx::DataType::UINT8, out_shape,
+                                tim::vx::TensorAttribute::OUTPUT, quantOutput);
+
+    auto input_tensor = graph->CreateTensor(input_spec);
+    auto output_tensor = graph->CreateTensor(output_spec);
+
+    std::vector<float> in_float_data = {
+        -3, -1, 0, 1, 3
+    };
+    std::vector<float> golden_float = {
+        -0.00404951, -0.15865529, 0, 0.8413447, 2.9959507
+    };
+
+    std::vector<uint8_t> input_data =
+      Quantize<uint8_t>(in_float_data, scalesInput[0], zeroPointsInput[0]);   //Quantification process
+    std::vector<uint8_t> golden =
+      Quantize<uint8_t>(golden_float, scalesOutput[0], zeroPointsOutput[0]);
+    std::vector<uint8_t> tolerance =
+      Quantize<uint8_t>(scalesInput, scalesOutput[0], zeroPointsOutput[0]);
+
+    EXPECT_TRUE(input_tensor->CopyDataToTensor(input_data.data(), input_data.size()*4));
+    auto op = graph->CreateOperation<tim::vx::ops::Gelu>(false);
+    (*op).BindInput(input_tensor).BindOutput(output_tensor);
+
+    EXPECT_TRUE(graph->Compile());
+    EXPECT_TRUE(graph->Run());
+    std::vector<uint8_t> output(golden.size());
+
+    EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
+    EXPECT_TRUE(ArraysMatch(golden, output, tolerance[0]));
+}
\ No newline at end of file
diff --git a/src/tim/vx/ops/addn_test.cc b/src/tim/vx/ops/addn_test.cc
index 9ac2b50..03cbfa9 100644
--- a/src/tim/vx/ops/addn_test.cc
+++ b/src/tim/vx/ops/addn_test.cc
@@ -43,13 +43,13 @@ TEST(AddN, shape_2_2_int32) {
     auto input_tensor_y = graph->CreateTensor(input_spec);
     auto output_tensor = graph->CreateTensor(output_spec);
 
-    std::vector<int32_t> in_data_x = { 
-        3, 5, 
-        4, 8 };  
-    std::vector<int32_t> in_data_y = { 
-        1, 6, 
-        2, 9 };  
-    std::vector<int32_t> golden = { 
+    std::vector<int32_t> in_data_x = {
+        3, 5,
+        4, 8 };
+    std::vector<int32_t> in_data_y = {
+        1, 6,
+        2, 9 };
+    std::vector<int32_t> golden = {
         4, 11,
         6, 17 };  //correct answer
 
@@ -61,7 +61,7 @@ TEST(AddN, shape_2_2_int32) {
     EXPECT_TRUE(graph->Compile());
     EXPECT_TRUE(graph->Run());
     std::vector<int32_t> output(4);
-    
+
     EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
     EXPECT_EQ(golden, output);
 }
@@ -70,7 +70,7 @@ TEST(AddN, shape_3_1_float32) {
     auto ctx = tim::vx::Context::Create();
     auto graph = ctx->CreateGraph();
 
-    tim::vx::ShapeType io_shape({3, 1});    
+    tim::vx::ShapeType io_shape({3, 1});
     tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT32,
                             io_shape, tim::vx::TensorAttribute::INPUT);
     tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32,
@@ -80,42 +80,42 @@ TEST(AddN, shape_3_1_float32) {
     auto input_tensor_y = graph->CreateTensor(input_spec);
     auto output_tensor = graph->CreateTensor(output_spec);
 
-    std::vector<float> in_data_x = { 
-        3, 5, 7 };  
-    std::vector<float> in_data_y = { 
-        1, 6, 2 };  
-    std::vector<float> golden = { 
-        4, 11, 9 };  
+    std::vector<float> in_data_x = {
+        3, 5, 7 };
+    std::vector<float> in_data_y = {
+        1, 6, 2 };
+    std::vector<float> golden = {
+        4, 11, 9 };
 
     EXPECT_TRUE(input_tensor_x->CopyDataToTensor(in_data_x.data(), in_data_x.size()*4));
     EXPECT_TRUE(input_tensor_y->CopyDataToTensor(in_data_y.data(), in_data_y.size()*4));
-    auto op = graph->CreateOperation<tim::vx::ops::AddN>(2);   
+    auto op = graph->CreateOperation<tim::vx::ops::AddN>(2);
     (*op).BindInputs({input_tensor_x, input_tensor_y}).BindOutputs({output_tensor});
 
     EXPECT_TRUE(graph->Compile());
     EXPECT_TRUE(graph->Run());
     std::vector<float> output(3);
-    
+
     EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
     EXPECT_EQ(golden, output);
 }
 
-TEST(AddN, shape_2_2_uint8_QuantizedTest) {
+TEST(AddN, shape_2_2_uint8_Quantized) {
     auto ctx = tim::vx::Context::Create();
     auto graph = ctx->CreateGraph();
 
-    tim::vx::ShapeType in_shape({2, 2}); 
+    tim::vx::ShapeType in_shape({2, 2});
     tim::vx::ShapeType out_shape({2, 2});
 
     float InputMin = -127, InputMax = 128, OutputMin = -127, OutputMax = 128;
 
-    std::pair<float, int32_t> scalesAndZp;     
+    std::pair<float, int32_t> scalesAndZp;
 
-    scalesAndZp = QuantizationParams<u_int8_t>(InputMin, InputMax);
+    scalesAndZp = QuantizationParams<uint8_t>(InputMin, InputMax);
     std::vector<float> scalesInput = {scalesAndZp.first};   //scale
     std::vector<int32_t> zeroPointsInput = {scalesAndZp.second}; //zero point
 
-    scalesAndZp = QuantizationParams<u_int8_t>(OutputMin, OutputMax);
+    scalesAndZp = QuantizationParams<uint8_t>(OutputMin, OutputMax);
     std::vector<float> scalesOutput = {scalesAndZp.first};
     std::vector<int32_t> zeroPointsOutput = {scalesAndZp.second};
 
@@ -123,7 +123,7 @@ TEST(AddN, shape_2_2_uint8_QuantizedTest) {
     tim::vx::Quantization quantInput(tim::vx::QuantType::ASYMMETRIC, 1,
                                    scalesInput, zeroPointsInput);
     tim::vx::Quantization quantOutput(tim::vx::QuantType::ASYMMETRIC, 1,
-                                    scalesOutput, zeroPointsOutput);                
+                                    scalesOutput, zeroPointsOutput);
 
     tim::vx::TensorSpec input_spec_x(tim::vx::DataType::UINT8, in_shape,
                                  tim::vx::TensorAttribute::INPUT, quantInput);
@@ -137,32 +137,32 @@ TEST(AddN, shape_2_2_uint8_QuantizedTest) {
     auto input_tensor_y = graph->CreateTensor(input_spec_y);
     auto output_tensor = graph->CreateTensor(output_spec);
 
-    std::vector<float> in_float_data_x = { 
-        3.1, 5.1, 
-        4.1, 8 };  
-    std::vector<float> in_float_data_y = { 
-        1.1, 6.1, 
-        2.1, 9 }; 
-    std::vector<float> golden_float = { 
+    std::vector<float> in_float_data_x = {
+        3.1, 5.1,
+        4.1, 8 };
+    std::vector<float> in_float_data_y = {
+        1.1, 6.1,
+        2.1, 9 };
+    std::vector<float> golden_float = {
         4.2, 11.2,
-        6.2, 17 };  
+        6.2, 17 };
 
-    std::vector<u_int8_t> input_data_x =
+    std::vector<uint8_t> input_data_x =
       Quantize<uint8_t>(in_float_data_x, scalesInput[0], zeroPointsInput[0]);//Quantification process
-    std::vector<u_int8_t> input_data_y =
+    std::vector<uint8_t> input_data_y =
       Quantize<uint8_t>(in_float_data_y, scalesInput[0], zeroPointsInput[0]);
-    std::vector<u_int8_t> golden =
+    std::vector<uint8_t> golden =
       Quantize<uint8_t>(golden_float, scalesOutput[0], zeroPointsOutput[0]);
 
     EXPECT_TRUE(input_tensor_x->CopyDataToTensor(input_data_x.data(), input_data_x.size()*4));
     EXPECT_TRUE(input_tensor_y->CopyDataToTensor(input_data_y.data(), input_data_y.size()*4));
-    auto op = graph->CreateOperation<tim::vx::ops::AddN>(2);   
+    auto op = graph->CreateOperation<tim::vx::ops::AddN>(2);
     (*op).BindInputs({input_tensor_x, input_tensor_y}).BindOutputs({output_tensor});
 
     EXPECT_TRUE(graph->Compile());
     EXPECT_TRUE(graph->Run());
     std::vector<uint8_t> output(4);
-    
+
     EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
     EXPECT_EQ(golden, output);
 }
\ No newline at end of file