Added general Float16 support (#631)

Added Float16 type definition from third-party Refine float16 bias handlling in conv2d Refine float16 case in conv2d Caution: Headers of float16 only be included when build unit_test Type: New Feature Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
2023-08-12 10:04:16 +08:00 · 2023-08-12 10:04:16 +08:00 · af50cc5e3f
parent 35e50d7692
commit af50cc5e3f
12 changed files with 3771 additions and 196 deletions
--- a/2
+++ b/2
@ -134,8 +134,10 @@ cc_binary(
 cc_test (
    name = "unit_test",
    copts = ["-std=c++14", "-Werror"],
+    includes = ["third_party/half"],
    srcs = [
        "src/tim/vx/test_utils.h",
+        "third_party/half/half.hpp"
    ] + glob(["src/tim/**/*_test.cc"]),
    deps = [
        "@gtest//:gtest",
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -98,6 +98,8 @@ if(TIM_VX_ENABLE_TEST)
        FetchContent_Populate(googletest)
        add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
    endif()
+
+    include_directories(third_party/half)
 endif()

 if(TIM_VX_ENABLE_GRPC)
--- a/include/tim/vx/ops/conv2d.h
+++ b/include/tim/vx/ops/conv2d.h
@ -99,12 +99,9 @@ class Conv2d : public BuiltinOp {
  const int32_t multiplier_;
  const DataLayout kernel_layout_;

-#if defined(__clang__) && (__clang_major__ >= 15)
-#define TIM_VX_OPS_CONV2D_WITH_F16BIAS 1
 private:
  void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
                           int32_t input_idx) override;
-#endif
 };

 }  // namespace ops
--- a/src/tim/vx/ops/conv2d.cc
+++ b/src/tim/vx/ops/conv2d.cc
@ -42,8 +42,8 @@ Conv2d::Conv2d(Graph* graph, const std::array<uint32_t, 4> pad,
               const std::array<uint32_t, 2>& stride,
               const std::array<uint32_t, 2>& dilation, int32_t multiplier,
               DataLayout input_layout, DataLayout kernel_layout)
-    : Conv2d(graph, 0, PadType::AUTO, {0, 0}, stride, dilation, pad,
-             multiplier, input_layout, kernel_layout) {}
+    : Conv2d(graph, 0, PadType::AUTO, {0, 0}, stride, dilation, pad, multiplier,
+             input_layout, kernel_layout) {}

 Conv2d::Conv2d(Graph* graph, int32_t weights, PadType padding,
               const std::array<uint32_t, 2>& ksize,
@ -88,7 +88,8 @@ std::shared_ptr<Operation> Conv2d::Clone(std::shared_ptr<Graph>& graph) const {
      this->kernel_layout_);
 }

-const std::vector<std::shared_ptr<Tensor>> Conv2d::ConstantInputsTensor() const {
+const std::vector<std::shared_ptr<Tensor>> Conv2d::ConstantInputsTensor()
+    const {
  if (this->IsAllInputsConst()) {
    return {this->impl_->inputs_tensor_[0]};
  } else {
@ -96,33 +97,24 @@ const std::vector<std::shared_ptr<Tensor>> Conv2d::ConstantInputsTensor() const
  }
 }

-// Handle float16 bias if clang compiler is no less than 15.0.0 version
-#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
+// Handle float16 bias
 void Conv2d::OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
                                 int32_t input_idx) {
  if (tensor->GetDataType() == vx::DataType::FLOAT16 &&
      tensor->IsConstTensor() && impl_->inputs_tensor_.size() == 3) {
-    uint32_t bias_size = 1;
-    for (auto i : tensor->GetShape()) {
-      bias_size *= i;
-    }
-    std::vector<_Float16> in(bias_size);
-    tensor->CopyDataFromTensor(in.data());
+    float* float32_bias = tensor->ConvertTensorToFloat32Data();

-    std::vector<float> out(bias_size);
-    for (uint i = 0; i < bias_size; i++) {
-      out[i] = static_cast<float>(in[i]);
-    }
    TensorSpec fp32bias_spec(tim::vx::DataType::FLOAT32, tensor->GetShape(),
                             tim::vx::TensorAttribute::CONSTANT);
-    auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, out.data());
+
+    auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, float32_bias);
+    vsi_nn_Free(float32_bias);

    impl_->inputs_tensor_[2] = out_tensor;
    impl_->node()->input.tensors[input_idx] = out_tensor->GetId();
    impl_->graph_->RenewTensorConsumersMap(tensor, out_tensor, this);
  }
 }
-#endif

 }  // namespace ops
 }  // namespace vx
--- a/src/tim/vx/ops/conv2d_test.cc
+++ b/src/tim/vx/ops/conv2d_test.cc
@ -28,11 +28,12 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/types.h"
+#include "third_party/half/half.hpp"

-#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
 TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
  auto ctx = tim::vx::Context::Create();
  auto graph = ctx->CreateGraph();
+  using namespace half_float::literal;

  tim::vx::ShapeType input_shape({4, 2, 1, 1});   //whcn
  tim::vx::ShapeType weight_shape({2, 2, 1, 3});  //whio
@ -50,26 +51,29 @@ TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
                                  tim::vx::TensorAttribute::OUTPUT);

  // Input data  nchw
-  std::vector<_Float16> input_data = {
-      1, 1, 1, 1,  // row = 1
-      2, 2, 3, 2   // row = 2
+
+  std::vector<half_float::half> input_data = {
+      1.0_h, 1.0_h, 1.0_h, 1.0_h,  // row = 1
+      2.0_h, 2.0_h, 3.0_h, 2.0_h   // row = 2
  };

  // weight data   oihw
-  std::vector<_Float16> weight_data = {
-      1,  2,  3,  4,  //first 2x2 filter
-      -1, 1,  -1, 1,  // second 2x2 filter
-      -1, -1, 1,  1,  // third 2x2 filter
+  std::vector<half_float::half> weight_data = {
+      1.0_h,  2.0_h,  3.0_h,  4.0_h,  //first 2x2 filter
+      -1.0_h, 1.0_h,  -1.0_h, 1.0_h,  // second 2x2 filter
+      -1.0_h, -1.0_h, 1.0_h,  1.0_h,  // third 2x2 filter
  };

  // bias data
-  std::vector<_Float16> bias_data = {1, 2, 3};
+  std::vector<half_float::half> bias_data = {1.0_h, 2.0_h, 3.0_h};

-  // nchw
-  std::vector<_Float16> golden = {// first channel
-                               18, 22, 21, 8, 7, 9, 8, 3, 2, 3, 1, -1,
+  std::vector<half_float::half> golden = {
+      // first channel
+      18.0_h, 22.0_h, 21.0_h, 8.0_h, 7.0_h, 9.0_h, 8.0_h, 3.0_h, 2.0_h, 3.0_h,
+      1.0_h, -1.0_h,
      // second channel
-                               2, 3, 1, 0, 5, 6, 6, 4, -1, -2, -2, 1};
+      2.0_h, 3.0_h, 1.0_h, 0.0_h, 5.0_h, 6.0_h, 6.0_h, 4.0_h, -1.0_h, -2.0_h,
+      -2.0_h, 1.0_h};

  auto input_tensor = graph->CreateTensor(input_spec);
  auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data());
@ -80,8 +84,8 @@ TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -98,11 +102,10 @@ TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
  for (auto i : output_tensor->GetShape()) {
    output_size *= i;
  }
-  std::vector<_Float16> output(output_size);
+  std::vector<half_float::half> output(output_size);
  EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
-  EXPECT_TRUE(ArraysMatch(golden, output, (_Float16)0.1));
+  EXPECT_TRUE(ArraysMatch(golden, output, (half_float::half)0.1));
 }
-#endif

 TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) {
  auto ctx = tim::vx::Context::Create();
@ -155,8 +158,8 @@ TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -224,8 +227,8 @@ TEST(Conv2d, shape_4_2_2_2_float32_PointwiseTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -295,8 +298,8 @@ TEST(Conv2d, shape_4_2_1_2_float32_SimpleTest) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -361,8 +364,8 @@ TEST(Conv2d, shape_4_2_2_2_float32_SimpleChannelsTest) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -432,8 +435,8 @@ TEST(Conv2d, shape_6_3_1_1_float32_SimpleAnisotropicStridesTest) {
  std::array<uint32_t, 2> stride({3, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -497,8 +500,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -562,8 +565,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedConstFilterTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -627,8 +630,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedBiasTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -691,8 +694,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedValidTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -759,8 +762,8 @@ TEST(Conv2d, DISABLED_shape_4_2_2_2_float32_DisabledPointwiseMultifilterTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -827,8 +830,8 @@ TEST(Conv2d, shape_9_9_1_1_float32_SimpleDilationTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({3, 3});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -893,8 +896,8 @@ TEST(Conv2d, shape_4_2_1_2_float32_StrideTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -958,8 +961,8 @@ TEST(Conv2d, shape_4_2_1_2_float32_InputAndFilterSameWidthHeightTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1014,8 +1017,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest1) {
                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
-                                  zero_point_bias);
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_output, zero_point_output);

@ -1047,8 +1050,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest1) {

  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1062,8 +1065,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest1) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({1, 1});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1094,8 +1097,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {
  tim::vx::ShapeType output_shape(
      {2, 1, weight_shape[3], input_shape[3]});  //whcn

-  float input_min = -128.5, input_max = 128, weight_min = -128.5, weight_max = 128,
-        output_min = -127, output_max = 128;
+  float input_min = -128.5, input_max = 128, weight_min = -128.5,
+        weight_max = 128, output_min = -127, output_max = 128;

  std::pair<float, int32_t> scales_zp;

@ -1118,8 +1121,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {
                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
-                                  zero_point_bias);
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_output, zero_point_output);

@ -1151,8 +1154,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {

  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1167,8 +1170,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({1, 1});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1223,8 +1226,8 @@ TEST(Conv2d, shape_6_3_1_1_uint8_AnisotropicStridesQuantizedTest) {
                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
-                                  zero_point_bias);
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_output, zero_point_output);

@ -1255,8 +1258,8 @@ TEST(Conv2d, shape_6_3_1_1_uint8_AnisotropicStridesQuantizedTest) {

  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1271,8 +1274,8 @@ TEST(Conv2d, shape_6_3_1_1_uint8_AnisotropicStridesQuantizedTest) {
  std::array<uint32_t, 2> stride({3, 1});
  std::array<uint32_t, 2> dilation({1, 1});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1327,8 +1330,8 @@ TEST(Conv2d, shape_9_9_1_1_uint8_DilationQuantizedTest) {
                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
-                                  zero_point_bias);
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_output, zero_point_output);

@ -1362,8 +1365,8 @@ TEST(Conv2d, shape_9_9_1_1_uint8_DilationQuantizedTest) {

  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1378,8 +1381,8 @@ TEST(Conv2d, shape_9_9_1_1_uint8_DilationQuantizedTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({3, 3});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1434,8 +1437,8 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerTensorTest) {
                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
-                                  zero_point_bias);
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
                                     scales_output, zero_point_output);

@ -1481,8 +1484,8 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerTensorTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({1, 1});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1583,8 +1586,8 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerChannelTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({1, 1});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1699,8 +1702,8 @@ TEST(Conv2d, shape_w_h_128_1_ksize_1_1_stride_2_int8_QuantizedPerChannelTest) {
      for (uint32_t i = 0; i < golden_size; i++) {
        golden_float[i] = 129;
      }
-      std::vector<int8_t> golden =
-          Quantize<int8_t>(golden_float, scales_output[0], zero_point_output[0]);
+      std::vector<int8_t> golden = Quantize<int8_t>(
+          golden_float, scales_output[0], zero_point_output[0]);

      auto ctx = tim::vx::Context::Create();
      auto graph = ctx->CreateGraph();
@ -1748,12 +1751,14 @@ TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest){
  tim::vx::ShapeType output_shape(
      {4, 2, weight_shape[3], input_shape[3]});  //whcn
  int8_t fl_input = 9, fl_weight = 8, fl_output = 8;
-  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_input);
-  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_weight);
-  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_output);
+  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
+                                    fl_input);
+  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
+                                     fl_weight);
+  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
+                                     fl_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::INT16, input_shape,
-                                     tim::vx::TensorAttribute::INPUT,
-                                     quant_input);
+                                 tim::vx::TensorAttribute::INPUT, quant_input);
  tim::vx::TensorSpec weight_spec(tim::vx::DataType::INT16, weight_shape,
                                  tim::vx::TensorAttribute::CONSTANT,
                                  quant_weight);
@ -1771,14 +1776,12 @@ TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest){
      1, 2  // first filter
  };
  //input data(dfp16)
-  std::vector<int16_t> input_data = {
-      256,256,256,256, 512,512,512,512, 256,256,256,256,512,512,512,512,
-      256,512,768,1024,256,512,768,1024,256,512,768,1024,256,512,768,1024
-  };
+  std::vector<int16_t> input_data = {256, 256, 256, 256,  512, 512, 512, 512,
+                                     256, 256, 256, 256,  512, 512, 512, 512,
+                                     256, 512, 768, 1024, 256, 512, 768, 1024,
+                                     256, 512, 768, 1024, 256, 512, 768, 1024};
  //weight data(dfp16)
-  std::vector<int16_t> weight_data = {
-      256,512
-  };
+  std::vector<int16_t> weight_data = {256, 512};
  // bias data
  std::vector<int64_t> bias_data = {0};
  //golden
@ -1793,8 +1796,8 @@ TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest){
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1831,19 +1834,21 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
      {4, 2, weight_shape[3], input_shape[3]});  //whcn
  int8_t fl_input = 9, fl_weight = 8, fl_bias = 17, fl_output = 8;

-  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_input);
-  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_bias);
-  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_output);
+  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
+                                    fl_input);
+  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
+                                     fl_weight);
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
+                                   fl_bias);
+  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
+                                     fl_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::INT16, input_shape,
-                                     tim::vx::TensorAttribute::INPUT,
-                                     quant_input);
+                                 tim::vx::TensorAttribute::INPUT, quant_input);
  tim::vx::TensorSpec weight_spec(tim::vx::DataType::INT16, weight_shape,
                                  tim::vx::TensorAttribute::CONSTANT,
                                  quant_weight);
  tim::vx::TensorSpec bias_spec(tim::vx::DataType::INT64, bias_shape,
-                                      tim::vx::TensorAttribute::CONSTANT,
-                                      quant_bias);
+                                tim::vx::TensorAttribute::CONSTANT, quant_bias);
  tim::vx::TensorSpec output_spec(tim::vx::DataType::INT16, output_shape,
                                  tim::vx::TensorAttribute::OUTPUT,
                                  quant_output);
@ -1871,18 +1876,12 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
                               // third channel
                               5, 6, 6, 4, -1, -2, -2, 1};

-  std::vector<int16_t> input_data = {
-    512, 512, 512, 512,
-    1024,1024,1536,1024
-  };
-  std::vector<int16_t> weight_data = {
-    256,512,768,1024,
-    -256,256,-256,256,
-    -256,-256,256,256
-  };
-  std::vector<int64_t> bias_data = {
-    1<<fl_bias, 2*(1<<fl_bias),3*(1<<fl_bias)
-  };
+  std::vector<int16_t> input_data = {512,  512,  512,  512,
+                                     1024, 1024, 1536, 1024};
+  std::vector<int16_t> weight_data = {256,  512, 768,  1024, -256, 256,
+                                      -256, 256, -256, -256, 256,  256};
+  std::vector<int64_t> bias_data = {1 << fl_bias, 2 * (1 << fl_bias),
+                                    3 * (1 << fl_bias)};

  auto input_tensor = graph->CreateTensor(input_spec, input_data.data());
  auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data());
@ -1894,8 +1893,8 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});

-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
-      padding, stride, dilation);
+  auto conv2d =
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1939,13 +1938,16 @@ TEST(Conv2d, kernel_bigger_than_input_SAME) {
  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32, output_shape,
                                  tim::vx::TensorAttribute::OUTPUT);

-  std::vector<float> input_data = {1.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f,
+  std::vector<float> input_data = {
+      1.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f,
  };
-  std::vector<float> weight = {100.0f, 20.0f, 1.0f, 200.0f, 10.0f, 2.0f,
+  std::vector<float> weight = {
+      100.0f, 20.0f, 1.0f, 200.0f, 10.0f, 2.0f,
  };
  std::vector<float> bias = {500.0f};
-  std::vector<float> golden = {567.0f,  1480.0f, 608.0f,  1370.0f,
-                               543.0f,  760.0f, };
+  std::vector<float> golden = {
+      567.0f, 1480.0f, 608.0f, 1370.0f, 543.0f, 760.0f,
+  };
  auto input_tensor = graph->CreateTensor(input_spec);
  auto weight_tensor = graph->CreateTensor(kernel_spec, weight.data());
  auto bias_tensor = graph->CreateTensor(bias_spec, bias.data());
@ -1956,7 +1958,9 @@ TEST(Conv2d, kernel_bigger_than_input_SAME) {
  auto op = graph->CreateOperation<tim::vx::ops::Conv2d>(
      tim::vx::PadType::SAME, strides, dilations, 0, tim::vx::DataLayout::WHCN,
      tim::vx::DataLayout::IcWHOc);
-  (*op).BindInputs({input_tensor, weight_tensor, bias_tensor}).BindOutputs({output_tensor});
+  (*op)
+      .BindInputs({input_tensor, weight_tensor, bias_tensor})
+      .BindOutputs({output_tensor});

  EXPECT_TRUE(graph->Compile());

--- a/third_party/half/ChangeLog.txt
+++ b/third_party/half/ChangeLog.txt
@ -0,0 +1,184 @@
+Release Notes											{#changelog}
+=============
+
+1.12.0 release (2017-03-06):
+----------------------------
+
+- Changed behaviour of `half_cast` to perform conversions to/from `double` 
+  and `long double` directly according to specified rounding mode, without an 
+  intermediate `float` conversion.
+- Added `noexcept` specifiers to constructors.
+- Fixed minor portability problem with `logb` and `ilogb`.
+- Tested for *VC++ 2015*.
+
+
+1.11.0 release (2013-11-16):
+----------------------------
+
+- Made tie-breaking behaviour in round to nearest configurable by 
+  `HALF_ROUND_TIES_TO_EVEN` macro.
+- Completed support for all C++11 mathematical functions even if single-
+  precision versions from `<cmath>` are unsupported.
+- Fixed inability to disable support for C++11 mathematical functions on 
+  *VC++ 2013*.
+
+
+1.10.0 release (2013-11-09):
+----------------------------
+
+- Made default rounding mode configurable by `HALF_ROUND_STYLE` macro.
+- Added support for non-IEEE single-precision implementations.
+- Added `HALF_ENABLE_CPP11_TYPE_TRAITS` preprocessor flag for checking 
+  support for C++11 type traits and TMP features.
+- Restricted `half_cast` to support built-in arithmetic types only.
+- Changed behaviour of `half_cast` to respect rounding mode when casting 
+  to/from integer types.
+
+
+1.9.2 release (2013-11-01):
+---------------------------
+
+- Tested for *gcc 4.8*.
+- Tested and fixed for *VC++ 2013*.
+- Removed unnecessary warnings in *MSVC*.
+
+
+1.9.1 release (2013-08-08):
+---------------------------
+
+- Fixed problems with older gcc and MSVC versions.
+- Small fix to non-C++11 implementations of `remainder` and `remquo`.
+
+
+1.9.0 release (2013-08-07):
+---------------------------
+
+- Changed behaviour of `nearbyint`, `rint`, `lrint` and `llrint` to use 
+  rounding mode of half-precision implementation (which is 
+  truncating/indeterminate) instead of single-precision rounding mode.
+- Added support for more C++11 mathematical functions even if single-
+  precision versions from `<cmath>` are unsupported, in particular 
+  `remainder`, `remquo` and `cbrt`.
+- Minor implementation changes.
+
+
+1.8.1 release (2013-01-22):
+---------------------------
+
+- Fixed bug resulting in multiple definitions of the `nanh` function due to 
+  a missing `inline` specification.
+
+
+1.8.0 release (2013-01-19):
+---------------------------
+
+- Added support for more C++11 mathematical functions even if single-
+  precision versions from `<cmath>` are unsupported, in particular 
+  exponential and logarithm functions, hyperbolic area functions and the 
+  hypotenuse function.
+- Made `fma` function use default implementation if single-precision version
+  from `<cmath>` is not faster and thus `FP_FAST_FMAH` to be defined always.
+- Fixed overload resolution issues when invoking certain mathematical 
+  functions by unqualified calls.
+
+
+1.7.0 release (2012-10-26):
+---------------------------
+
+- Added support for C++11 `noexcept` specifiers.
+- Changed C++11 `long long` to be supported on *VC++ 2003* and up.
+
+
+1.6.1 release (2012-09-13):
+---------------------------
+
+- Made `fma` and `fdim` functions available even if corresponding 
+  single-precision functions are not.
+
+
+1.6.0 release (2012-09-12):
+---------------------------
+
+- Added `HALF_ENABLE_CPP11_LONG_LONG` to control support for `long long` 
+  integers and corresponding mathematical functions.
+- Fixed C++98 compatibility on non-VC compilers.
+
+
+1.5.1 release (2012-08-17):
+---------------------------
+
+- Recorrected `std::numeric_limits::round_style` to always return 
+  `std::round_indeterminate`, due to overflow-handling deviating from 
+  correct round-toward-zero behaviour.
+
+
+1.5.0 release (2012-08-16):
+---------------------------
+
+- Added `half_cast` for explicitly casting between half and any type 
+  convertible to/from `float` and allowing the explicit specification of 
+  the rounding mode to use.
+
+
+1.4.0 release (2012-08-12):
+---------------------------
+
+- Added support for C++11 generalized constant expressions (`constexpr`).
+
+
+1.3.1 release (2012-08-11):
+---------------------------
+
+- Fixed requirement for `std::signbit` and `std::isnan` (even if C++11 
+  `<cmath>` functions disabled) on non-VC compilers.
+
+
+1.3.0 release (2012-08-10):
+---------------------------
+
+- Made requirement for `<cstdint>` and `static_assert` optional and thus 
+  made the library C++98-compatible.
+- Made support for C++11 features user-overridable through explicit 
+  definition of corresponding preprocessor symbols to either 0 or 1.
+- Renamed `HALF_ENABLE_HASH` to `HALF_ENABLE_CPP11_HASH` in correspondence 
+  with other C++11 preprocessor symbols.
+
+
+1.2.0 release (2012-08-07):
+---------------------------
+
+- Added proper preprocessor definitions for `HUGE_VALH` and `FP_FAST_FMAH` 
+  in correspondence with their single-precision counterparts from `<cmath>`.
+- Fixed internal preprocessor macros to be properly undefined after use.
+
+
+1.1.2 release (2012-08-07):
+---------------------------
+
+- Revised `std::numeric_limits::round_style` to return 
+  `std::round_toward_zero` if the `float` version also does and 
+  `std::round_indeterminate` otherwise.
+- Fixed `std::numeric_limits::round_error` to reflect worst-case round 
+  toward zero behaviour.
+
+
+1.1.1 release (2012-08-06):
+---------------------------
+
+- Fixed `std::numeric_limits::min` to return smallest positive normal 
+  number, instead of subnormal number.
+- Fixed `std::numeric_limits::round_style` to return 
+  `std::round_indeterminate` due to mixture of separately rounded 
+  single-precision arithmetics with truncating single-to-half conversions.
+
+
+1.1.0 release (2012-08-06):
+---------------------------
+
+- Added half-precision literals.
+
+
+1.0.0 release (2012-08-05):
+---------------------------
+
+- First release.
--- a/third_party/half/ChangeLog.txt.license
+++ b/third_party/half/ChangeLog.txt.license
@ -0,0 +1,4 @@
+#
+# Copyright (c) 2012-2017 Christian Rau
+# SPDX-License-Identifier: MIT
+#
--- a/third_party/half/LICENSE.spdx
+++ b/third_party/half/LICENSE.spdx
@ -0,0 +1,7 @@
+PackageName: half
+SPDXID: SPDXRef-half
+FilesAnalyzed: true
+PackageLicenseConcluded: MIT
+PackageLicenseInfoFromFiles: MIT
+PackageLicenseDeclared: MIT
+PackageCopyrightText:<text>Copyright (c) 2012-2017 Christian Rau <rauy@users.sourceforge.net></text>
--- a/third_party/half/LICENSE.txt
+++ b/third_party/half/LICENSE.txt
@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2012-2017 Christian Rau
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/third_party/half/README.txt
+++ b/third_party/half/README.txt
@ -0,0 +1,288 @@
+HALF-PRECISION FLOATING POINT LIBRARY (Version 1.12.0)
+------------------------------------------------------
+
+This is a C++ header-only library to provide an IEEE 754 conformant 16-bit 
+half-precision floating point type along with corresponding arithmetic 
+operators, type conversions and common mathematical functions. It aims for both 
+efficiency and ease of use, trying to accurately mimic the behaviour of the 
+builtin floating point types at the best performance possible.
+
+
+INSTALLATION AND REQUIREMENTS
+-----------------------------
+
+Comfortably enough, the library consists of just a single header file 
+containing all the functionality, which can be directly included by your 
+projects, without the neccessity to build anything or link to anything.
+
+Whereas this library is fully C++98-compatible, it can profit from certain 
+C++11 features. Support for those features is checked automatically at compile 
+(or rather preprocessing) time, but can be explicitly enabled or disabled by 
+defining the corresponding preprocessor symbols to either 1 or 0 yourself. This 
+is useful when the automatic detection fails (for more exotic implementations) 
+or when a feature should be explicitly disabled:
+
+  - 'long long' integer type for mathematical functions returning 'long long' 
+    results (enabled for VC++ 2003 and newer, gcc and clang, overridable with 
+    'HALF_ENABLE_CPP11_LONG_LONG').
+
+  - Static assertions for extended compile-time checks (enabled for VC++ 2010, 
+    gcc 4.3, clang 2.9 and newer, overridable with 'HALF_ENABLE_CPP11_STATIC_ASSERT').
+
+  - Generalized constant expressions (enabled for VC++ 2015, gcc 4.6, clang 3.1 
+    and newer, overridable with 'HALF_ENABLE_CPP11_CONSTEXPR').
+
+  - noexcept exception specifications (enabled for VC++ 2015, gcc 4.6, clang 3.0 
+    and newer, overridable with 'HALF_ENABLE_CPP11_NOEXCEPT').
+
+  - User-defined literals for half-precision literals to work (enabled for 
+    VC++ 2015, gcc 4.7, clang 3.1 and newer, overridable with 
+    'HALF_ENABLE_CPP11_USER_LITERALS').
+
+  - Type traits and template meta-programming features from <type_traits> 
+    (enabled for VC++ 2010, libstdc++ 4.3, libc++ and newer, overridable with 
+    'HALF_ENABLE_CPP11_TYPE_TRAITS').
+
+  - Special integer types from <cstdint> (enabled for VC++ 2010, libstdc++ 4.3, 
+    libc++ and newer, overridable with 'HALF_ENABLE_CPP11_CSTDINT').
+
+  - Certain C++11 single-precision mathematical functions from <cmath> for 
+    an improved implementation of their half-precision counterparts to work 
+    (enabled for VC++ 2013, libstdc++ 4.3, libc++ and newer, overridable with 
+    'HALF_ENABLE_CPP11_CMATH').
+
+  - Hash functor 'std::hash' from <functional> (enabled for VC++ 2010, 
+    libstdc++ 4.3, libc++ and newer, overridable with 'HALF_ENABLE_CPP11_HASH').
+
+The library has been tested successfully with Visual C++ 2005-2015, gcc 4.4-4.8 
+and clang 3.1. Please contact me if you have any problems, suggestions or even 
+just success testing it on other platforms.
+
+
+DOCUMENTATION
+-------------
+
+Here follow some general words about the usage of the library and its 
+implementation. For a complete documentation of its iterface look at the 
+corresponding website http://half.sourceforge.net. You may also generate the 
+complete developer documentation from the library's only include file's doxygen 
+comments, but this is more relevant to developers rather than mere users (for 
+reasons described below).
+
+BASIC USAGE
+
+To make use of the library just include its only header file half.hpp, which 
+defines all half-precision functionality inside the 'half_float' namespace. The 
+actual 16-bit half-precision data type is represented by the 'half' type. This 
+type behaves like the builtin floating point types as much as possible, 
+supporting the usual arithmetic, comparison and streaming operators, which 
+makes its use pretty straight-forward:
+
+    using half_float::half;
+    half a(3.4), b(5);
+    half c = a * b;
+    c += 3;
+    if(c > a)
+	    std::cout << c << std::endl;
+
+Additionally the 'half_float' namespace also defines half-precision versions 
+for all mathematical functions of the C++ standard library, which can be used 
+directly through ADL:
+
+    half a(-3.14159);
+    half s = sin(abs(a));
+    long l = lround(s);
+
+You may also specify explicit half-precision literals, since the library 
+provides a user-defined literal inside the 'half_float::literal' namespace, 
+which you just need to import (assuming support for C++11 user-defined literals):
+
+    using namespace half_float::literal;
+    half x = 1.0_h;
+
+Furthermore the library provides proper specializations for 
+'std::numeric_limits', defining various implementation properties, and 
+'std::hash' for hashing half-precision numbers (assuming support for C++11 
+'std::hash'). Similar to the corresponding preprocessor symbols from <cmath> 
+the library also defines the 'HUGE_VALH' constant and maybe the 'FP_FAST_FMAH' 
+symbol.
+
+CONVERSIONS AND ROUNDING
+
+The half is explicitly constructible/convertible from a single-precision float 
+argument. Thus it is also explicitly constructible/convertible from any type 
+implicitly convertible to float, but constructing it from types like double or 
+int will involve the usual warnings arising when implicitly converting those to 
+float because of the lost precision. On the one hand those warnings are 
+intentional, because converting those types to half neccessarily also reduces 
+precision. But on the other hand they are raised for explicit conversions from 
+those types, when the user knows what he is doing. So if those warnings keep 
+bugging you, then you won't get around first explicitly converting to float 
+before converting to half, or use the 'half_cast' described below. In addition 
+you can also directly assign float values to halfs.
+
+In contrast to the float-to-half conversion, which reduces precision, the 
+conversion from half to float (and thus to any other type implicitly 
+convertible from float) is implicit, because all values represetable with 
+half-precision are also representable with single-precision. This way the 
+half-to-float conversion behaves similar to the builtin float-to-double 
+conversion and all arithmetic expressions involving both half-precision and 
+single-precision arguments will be of single-precision type. This way you can 
+also directly use the mathematical functions of the C++ standard library, 
+though in this case you will invoke the single-precision versions which will 
+also return single-precision values, which is (even if maybe performing the 
+exact same computation, see below) not as conceptually clean when working in a 
+half-precision environment.
+
+The default rounding mode for conversions from float to half uses truncation 
+(round toward zero, but mapping overflows to infinity) for rounding values not 
+representable exactly in half-precision. This is the fastest rounding possible 
+and is usually sufficient. But by redefining the 'HALF_ROUND_STYLE' 
+preprocessor symbol (before including half.hpp) this default can be overridden 
+with one of the other standard rounding modes using their respective constants 
+or the equivalent values of 'std::float_round_style' (it can even be 
+synchronized with the underlying single-precision implementation by defining it 
+to 'std::numeric_limits<float>::round_style'):
+
+  - 'std::round_indeterminate' or -1 for the fastest rounding (default).
+
+  - 'std::round_toward_zero' or 0 for rounding toward zero.
+
+  - std::round_to_nearest' or 1 for rounding to the nearest value.
+
+  - std::round_toward_infinity' or 2 for rounding toward positive infinity.
+
+  - std::round_toward_neg_infinity' or 3 for rounding toward negative infinity.
+
+In addition to changing the overall default rounding mode one can also use the 
+'half_cast'. This converts between half and any built-in arithmetic type using 
+a configurable rounding mode (or the default rounding mode if none is 
+specified). In addition to a configurable rounding mode, 'half_cast' has 
+another big difference to a mere 'static_cast': Any conversions are performed 
+directly using the given rounding mode, without any intermediate conversion 
+to/from 'float'. This is especially relevant for conversions to integer types, 
+which don't necessarily truncate anymore. But also for conversions from 
+'double' or 'long double' this may produce more precise results than a 
+pre-conversion to 'float' using the single-precision implementation's current 
+rounding mode would.
+
+    half a = half_cast<half>(4.2);
+    half b = half_cast<half,std::numeric_limits<float>::round_style>(4.2f);
+    assert( half_cast<int, std::round_to_nearest>( 0.7_h )     == 1 );
+    assert( half_cast<half,std::round_toward_zero>( 4097 )     == 4096.0_h );
+    assert( half_cast<half,std::round_toward_infinity>( 4097 ) == 4100.0_h );
+    assert( half_cast<half,std::round_toward_infinity>( std::numeric_limits<double>::min() ) > 0.0_h );
+
+When using round to nearest (either as default or through 'half_cast') ties are 
+by default resolved by rounding them away from zero (and thus equal to the 
+behaviour of the 'round' function). But by redefining the 
+'HALF_ROUND_TIES_TO_EVEN' preprocessor symbol to 1 (before including half.hpp) 
+this default can be changed to the slightly slower but less biased and more 
+IEEE-conformant behaviour of rounding half-way cases to the nearest even value.
+
+    #define HALF_ROUND_TIES_TO_EVEN 1
+    #include <half.hpp>
+    ...
+    assert( half_cast<int,std::round_to_nearest>(3.5_h) 
+         == half_cast<int,std::round_to_nearest>(4.5_h) );
+
+IMPLEMENTATION
+
+For performance reasons (and ease of implementation) many of the mathematical 
+functions provided by the library as well as all arithmetic operations are 
+actually carried out in single-precision under the hood, calling to the C++ 
+standard library implementations of those functions whenever appropriate, 
+meaning the arguments are converted to floats and the result back to half. But 
+to reduce the conversion overhead as much as possible any temporary values 
+inside of lengthy expressions are kept in single-precision as long as possible, 
+while still maintaining a strong half-precision type to the outside world. Only 
+when finally assigning the value to a half or calling a function that works 
+directly on halfs is the actual conversion done (or never, when further 
+converting the result to float.
+
+This approach has two implications. First of all you have to treat the 
+library's documentation at http://half.sourceforge.net as a simplified version, 
+describing the behaviour of the library as if implemented this way. The actual 
+argument and return types of functions and operators may involve other internal 
+types (feel free to generate the exact developer documentation from the Doxygen 
+comments in the library's header file if you really need to). But nevertheless 
+the behaviour is exactly like specified in the documentation. The other 
+implication is, that in the presence of rounding errors or over-/underflows 
+arithmetic expressions may produce different results when compared to 
+converting to half-precision after each individual operation:
+
+    half a = std::numeric_limits<half>::max() * 2.0_h / 2.0_h;       // a = MAX
+    half b = half(std::numeric_limits<half>::max() * 2.0_h) / 2.0_h; // b = INF
+    assert( a != b );
+
+But this should only be a problem in very few cases. One last word has to be 
+said when talking about performance. Even with its efforts in reducing 
+conversion overhead as much as possible, the software half-precision 
+implementation can most probably not beat the direct use of single-precision 
+computations. Usually using actual float values for all computations and 
+temproraries and using halfs only for storage is the recommended way. On the 
+one hand this somehow makes the provided mathematical functions obsolete 
+(especially in light of the implicit conversion from half to float), but 
+nevertheless the goal of this library was to provide a complete and 
+conceptually clean half-precision implementation, to which the standard 
+mathematical functions belong, even if usually not needed.
+
+IEEE CONFORMANCE
+
+The half type uses the standard IEEE representation with 1 sign bit, 5 exponent 
+bits and 10 mantissa bits (11 when counting the hidden bit). It supports all 
+types of special values, like subnormal values, infinity and NaNs. But there 
+are some limitations to the complete conformance to the IEEE 754 standard:
+
+  - The implementation does not differentiate between signalling and quiet 
+    NaNs, this means operations on halfs are not specified to trap on 
+    signalling NaNs (though they may, see last point).
+
+  - Though arithmetic operations are internally rounded to single-precision 
+    using the underlying single-precision implementation's current rounding 
+    mode, those values are then converted to half-precision using the default 
+    half-precision rounding mode (changed by defining 'HALF_ROUND_STYLE' 
+    accordingly). This mixture of rounding modes is also the reason why 
+    'std::numeric_limits<half>::round_style' may actually return 
+    'std::round_indeterminate' when half- and single-precision rounding modes 
+    don't match.
+
+  - Because of internal truncation it may also be that certain single-precision 
+    NaNs will be wrongly converted to half-precision infinity, though this is 
+    very unlikely to happen, since most single-precision implementations don't 
+    tend to only set the lowest bits of a NaN mantissa.
+
+  - The implementation does not provide any floating point exceptions, thus 
+    arithmetic operations or mathematical functions are not specified to invoke 
+    proper floating point exceptions. But due to many functions implemented in 
+    single-precision, those may still invoke floating point exceptions of the 
+    underlying single-precision implementation.
+
+Some of those points could have been circumvented by controlling the floating 
+point environment using <cfenv> or implementing a similar exception mechanism. 
+But this would have required excessive runtime checks giving two high an impact 
+on performance for something that is rarely ever needed. If you really need to 
+rely on proper floating point exceptions, it is recommended to explicitly 
+perform computations using the built-in floating point types to be on the safe 
+side. In the same way, if you really need to rely on a particular rounding 
+behaviour, it is recommended to either use single-precision computations and 
+explicitly convert the result to half-precision using 'half_cast' and 
+specifying the desired rounding mode, or synchronize the default half-precision 
+rounding mode to the rounding mode of the single-precision implementation (most 
+likely 'HALF_ROUND_STYLE=1', 'HALF_ROUND_TIES_TO_EVEN=1'). But this is really 
+considered an expert-scenario that should be used only when necessary, since 
+actually working with half-precision usually comes with a certain 
+tolerance/ignorance of exactness considerations and proper rounding comes with 
+a certain performance cost.
+
+
+CREDITS AND CONTACT
+-------------------
+
+This library is developed by CHRISTIAN RAU and released under the MIT License 
+(see LICENSE.txt). If you have any questions or problems with it, feel free to 
+contact me at rauy@users.sourceforge.net.
+
+Additional credit goes to JEROEN VAN DER ZIJP for his paper on "Fast Half Float 
+Conversions", whose algorithms have been used in the library for converting 
+between half-precision and single-precision values.
--- a/third_party/half/README.txt.license
+++ b/third_party/half/README.txt.license
@ -0,0 +1,4 @@
+#
+# Copyright (c) 2012-2017 Christian Rau
+# SPDX-License-Identifier: MIT
+#
--- a/third_party/half/half.hpp
+++ b/third_party/half/half.hpp