From 61ea0091ca8e8a84d408e728269de4a449882fa7 Mon Sep 17 00:00:00 2001
From: Chen Feiyue <69809761+chenfeiyue-cfy@users.noreply.github.com>
Date: Wed, 13 Sep 2023 09:44:21 +0800
Subject: [PATCH] Fixed unsupported float16 bias in fc (#646)

Resolve the issue of underlying hardware not supporting float16 bias in fc
by converting bias type to float32

Type: Code Improvement

Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
---
 include/tim/vx/ops/fullyconnected.h   |  7 +++-
 src/tim/vx/ops/fullyconnected.cc      | 18 +++++++++
 src/tim/vx/ops/fullyconnected_test.cc | 53 +++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)
diff --git a/include/tim/vx/ops/fullyconnected.h b/include/tim/vx/ops/fullyconnected.h
index ffaf612..3da1790 100644
--- a/include/tim/vx/ops/fullyconnected.h
+++ b/include/tim/vx/ops/fullyconnected.h
@@ -33,8 +33,8 @@ namespace ops {
  * ## FullyConnected
  *
  * Denotes a fully (densely) connected layer, which connects all elements in the
- * input tensor with each element in the output tensor. 
- * 
+ * input tensor with each element in the output tensor.
+ *
  * - axis: Describes the axis of the inputs when coerced to 2D.
  * - weights: the output channel number for weight tensor.
  */
@@ -49,6 +49,9 @@ class FullyConnected : public BuiltinOp {
  protected:
   uint32_t axis_;
   uint32_t weights_;
+ private:
+  void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
+                           int32_t input_idx) override;
 };
 
 }  // namespace ops
diff --git a/src/tim/vx/ops/fullyconnected.cc b/src/tim/vx/ops/fullyconnected.cc
index 0730558..c95b8a4 100644
--- a/src/tim/vx/ops/fullyconnected.cc
+++ b/src/tim/vx/ops/fullyconnected.cc
@@ -45,6 +45,24 @@ std::shared_ptr<Operation> FullyConnected::Clone(
   return graph->CreateOperation<FullyConnected>(this->axis_, this->weights_);
 }
 
+void FullyConnected::OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
+                           int32_t input_idx) {
+  if (tensor->GetDataType() == vx::DataType::FLOAT16 &&
+      tensor->IsConstTensor() && impl_->inputs_tensor_.size() == 3) {
+    float* float32_bias = tensor->ConvertTensorToFloat32Data();
+
+    TensorSpec fp32bias_spec(tim::vx::DataType::FLOAT32, tensor->GetShape(),
+                             tim::vx::TensorAttribute::CONSTANT);
+
+    auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, float32_bias);
+    vsi_nn_Free(float32_bias);
+
+    impl_->inputs_tensor_[2] = out_tensor;
+    impl_->node()->input.tensors[input_idx] = out_tensor->GetId();
+    impl_->graph_->RenewTensorConsumersMap(tensor, out_tensor, this);
+  }
+}
+
 }  // namespace ops
 }  // namespace vx
 }  // namespace tim
diff --git a/src/tim/vx/ops/fullyconnected_test.cc b/src/tim/vx/ops/fullyconnected_test.cc
index 95f3015..29ed963 100644
--- a/src/tim/vx/ops/fullyconnected_test.cc
+++ b/src/tim/vx/ops/fullyconnected_test.cc
@@ -27,6 +27,7 @@
 #include <iostream>
 #include "gtest/gtest.h"
 #include "test_utils.h"
+#include "third_party/half/half.hpp"
 
 TEST(FullyConnected, unit_2_float_axis_0) {
   auto ctx = tim::vx::Context::Create();
@@ -74,3 +75,55 @@ TEST(FullyConnected, unit_2_float_axis_0) {
   EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
   EXPECT_EQ(golden, output);
 }
+
+TEST(FullyConnected, unit_2_float16_axis_0) {
+  auto ctx = tim::vx::Context::Create();
+  auto graph = ctx->CreateGraph();
+  using namespace half_float::literal;
+
+  tim::vx::ShapeType in_shape({2, 2});
+  tim::vx::ShapeType weight_shape({2, 3});
+  tim::vx::ShapeType bias_shape({3});
+  tim::vx::ShapeType out_shape({3, 2});
+  tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT16, in_shape,
+                                 tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec weight_spec(tim::vx::DataType::FLOAT16, weight_shape,
+                                   tim::vx::TensorAttribute::CONSTANT);
+  tim::vx::TensorSpec bias_spec(tim::vx::DataType::FLOAT16, bias_shape,
+                                   tim::vx::TensorAttribute::CONSTANT);
+  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT16, out_shape,
+                                  tim::vx::TensorAttribute::OUTPUT);
+  std::vector<half_float::half> in_data = {
+      1.0_h, 4.0_h, 2.0_h, 6.0_h
+  };
+  std::vector<half_float::half> weight = {
+      -3.0_h, 3.0_h, 2.0_h, 1.0_h, 0.0_h, 4.0_h
+  };
+  std::vector<half_float::half> bias = {
+      0.1_h, 0.4_h, 0.6_h
+  };
+  std::vector<half_float::half> golden = {
+      9.1_h, 6.4_h, 16.6_h, 12.1_h, 10.4_h, 24.6_h
+  };
+
+  auto input_tensor = graph->CreateTensor(input_spec);
+  auto weight_tensor = graph->CreateTensor(weight_spec, weight.data());
+  auto bias_tensor = graph->CreateTensor(bias_spec, bias.data());
+  auto output_tensor = graph->CreateTensor(output_spec);
+
+  EXPECT_TRUE(
+      input_tensor->CopyDataToTensor(in_data.data()));
+  auto op = graph->CreateOperation<tim::vx::ops::FullyConnected>(0, 3);
+  (*op).BindInputs({input_tensor, weight_tensor, bias_tensor}).BindOutputs({output_tensor});
+
+  EXPECT_TRUE(graph->Compile());
+  EXPECT_TRUE(graph->Run());
+
+  uint32_t output_size = 1;
+  for (auto i : output_tensor->GetShape()) {
+    output_size *= i;
+  }
+  std::vector<half_float::half> output(output_size);
+  EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
+  EXPECT_TRUE(ArraysMatch(golden, output, (half_float::half)0.1));
+}
\ No newline at end of file