Added general Float16 support (#631)

Added Float16 type definition from third-party Refine float16 bias handlling in conv2d Refine float16 case in conv2d Caution: Headers of float16 only be included when build unit_test Type: New Feature Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
2023-08-12 10:04:16 +08:00 · 2023-08-12 10:04:16 +08:00 · af50cc5e3f
parent 35e50d7692
commit af50cc5e3f
12 changed files with 3771 additions and 196 deletions
--- a/2
+++ b/2
@ -134,8 +134,10 @@ cc_binary(
 cc_test (
    name = "unit_test",
    copts = ["-std=c++14", "-Werror"],
    includes = ["third_party/half"],
    srcs = [
        "src/tim/vx/test_utils.h",
        "third_party/half/half.hpp"
    ] + glob(["src/tim/**/*_test.cc"]),
    deps = [
        "@gtest//:gtest",
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -98,6 +98,8 @@ if(TIM_VX_ENABLE_TEST)
        FetchContent_Populate(googletest)
        add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
    endif()
    include_directories(third_party/half)
 endif()
 if(TIM_VX_ENABLE_GRPC)
--- a/include/tim/vx/ops/conv2d.h
+++ b/include/tim/vx/ops/conv2d.h
@ -99,12 +99,9 @@ class Conv2d : public BuiltinOp {
  const int32_t multiplier_;
  const DataLayout kernel_layout_;
 #if defined(__clang__) && (__clang_major__ >= 15)
 #define TIM_VX_OPS_CONV2D_WITH_F16BIAS 1
 private:
  void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
                           int32_t input_idx) override;
 #endif
 };
 }  // namespace ops
--- a/src/tim/vx/ops/conv2d.cc
+++ b/src/tim/vx/ops/conv2d.cc
@ -42,8 +42,8 @@ Conv2d::Conv2d(Graph* graph, const std::array<uint32_t, 4> pad,
               const std::array<uint32_t, 2>& stride,
               const std::array<uint32_t, 2>& dilation, int32_t multiplier,
               DataLayout input_layout, DataLayout kernel_layout)
-    : Conv2d(graph, 0, PadType::AUTO, {0, 0}, stride, dilation, pad,
+    : Conv2d(graph, 0, PadType::AUTO, {0, 0}, stride, dilation, pad, multiplier,
-             multiplier, input_layout, kernel_layout) {}
+             input_layout, kernel_layout) {}
 Conv2d::Conv2d(Graph* graph, int32_t weights, PadType padding,
               const std::array<uint32_t, 2>& ksize,
@ -88,41 +88,33 @@ std::shared_ptr<Operation> Conv2d::Clone(std::shared_ptr<Graph>& graph) const {
      this->kernel_layout_);
 }
-const std::vector<std::shared_ptr<Tensor>> Conv2d::ConstantInputsTensor() const {
+const std::vector<std::shared_ptr<Tensor>> Conv2d::ConstantInputsTensor()
-   if (this->IsAllInputsConst()) {
+    const {
  if (this->IsAllInputsConst()) {
    return {this->impl_->inputs_tensor_[0]};
  } else {
    return {};
  }
 }
-// Handle float16 bias if clang compiler is no less than 15.0.0 version
+// Handle float16 bias
 #ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
 void Conv2d::OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
                                 int32_t input_idx) {
  if (tensor->GetDataType() == vx::DataType::FLOAT16 &&
      tensor->IsConstTensor() && impl_->inputs_tensor_.size() == 3) {
-    uint32_t bias_size = 1;
+    float* float32_bias = tensor->ConvertTensorToFloat32Data();
    for (auto i : tensor->GetShape()) {
      bias_size *= i;
    }
    std::vector<_Float16> in(bias_size);
    tensor->CopyDataFromTensor(in.data());
    std::vector<float> out(bias_size);
    for (uint i = 0; i < bias_size; i++) {
      out[i] = static_cast<float>(in[i]);
    }
    TensorSpec fp32bias_spec(tim::vx::DataType::FLOAT32, tensor->GetShape(),
                             tim::vx::TensorAttribute::CONSTANT);
-    auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, out.data());
+
    auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, float32_bias);
    vsi_nn_Free(float32_bias);
    impl_->inputs_tensor_[2] = out_tensor;
    impl_->node()->input.tensors[input_idx] = out_tensor->GetId();
    impl_->graph_->RenewTensorConsumersMap(tensor, out_tensor, this);
  }
 }
 #endif
 }  // namespace ops
 }  // namespace vx
--- a/src/tim/vx/ops/conv2d_test.cc
+++ b/src/tim/vx/ops/conv2d_test.cc
@ -28,11 +28,12 @@
 #include "tim/vx/context.h"
 #include "tim/vx/graph.h"
 #include "tim/vx/types.h"
 #include "third_party/half/half.hpp"
 #ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
 TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
  auto ctx = tim::vx::Context::Create();
  auto graph = ctx->CreateGraph();
  using namespace half_float::literal;
  tim::vx::ShapeType input_shape({4, 2, 1, 1});   //whcn
  tim::vx::ShapeType weight_shape({2, 2, 1, 3});  //whio
@ -50,26 +51,29 @@ TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
                                  tim::vx::TensorAttribute::OUTPUT);
  // Input data  nchw
-  std::vector<_Float16> input_data = {
+
-      1, 1, 1, 1,  // row = 1
+  std::vector<half_float::half> input_data = {
-      2, 2, 3, 2   // row = 2
+      1.0_h, 1.0_h, 1.0_h, 1.0_h,  // row = 1
      2.0_h, 2.0_h, 3.0_h, 2.0_h   // row = 2
  };
  // weight data   oihw
-  std::vector<_Float16> weight_data = {
+  std::vector<half_float::half> weight_data = {
-      1,  2,  3,  4,  //first 2x2 filter
+      1.0_h,  2.0_h,  3.0_h,  4.0_h,  //first 2x2 filter
-      -1, 1,  -1, 1,  // second 2x2 filter
+      -1.0_h, 1.0_h,  -1.0_h, 1.0_h,  // second 2x2 filter
-      -1, -1, 1,  1,  // third 2x2 filter
+      -1.0_h, -1.0_h, 1.0_h,  1.0_h,  // third 2x2 filter
  };
  // bias data
-  std::vector<_Float16> bias_data = {1, 2, 3};
+  std::vector<half_float::half> bias_data = {1.0_h, 2.0_h, 3.0_h};
-  // nchw
+  std::vector<half_float::half> golden = {
-  std::vector<_Float16> golden = {// first channel
+      // first channel
-                               18, 22, 21, 8, 7, 9, 8, 3, 2, 3, 1, -1,
+      18.0_h, 22.0_h, 21.0_h, 8.0_h, 7.0_h, 9.0_h, 8.0_h, 3.0_h, 2.0_h, 3.0_h,
-                               // second channel
+      1.0_h, -1.0_h,
-                               2, 3, 1, 0, 5, 6, 6, 4, -1, -2, -2, 1};
+      // second channel
      2.0_h, 3.0_h, 1.0_h, 0.0_h, 5.0_h, 6.0_h, 6.0_h, 4.0_h, -1.0_h, -2.0_h,
      -2.0_h, 1.0_h};
  auto input_tensor = graph->CreateTensor(input_spec);
  auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data());
@ -80,8 +84,8 @@ TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -98,11 +102,10 @@ TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
  for (auto i : output_tensor->GetShape()) {
    output_size *= i;
  }
-  std::vector<_Float16> output(output_size);
+  std::vector<half_float::half> output(output_size);
  EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
-  EXPECT_TRUE(ArraysMatch(golden, output, (_Float16)0.1));
+  EXPECT_TRUE(ArraysMatch(golden, output, (half_float::half)0.1));
 }
 #endif
 TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) {
  auto ctx = tim::vx::Context::Create();
@ -155,8 +158,8 @@ TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -224,8 +227,8 @@ TEST(Conv2d, shape_4_2_2_2_float32_PointwiseTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -295,8 +298,8 @@ TEST(Conv2d, shape_4_2_1_2_float32_SimpleTest) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -361,8 +364,8 @@ TEST(Conv2d, shape_4_2_2_2_float32_SimpleChannelsTest) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -432,8 +435,8 @@ TEST(Conv2d, shape_6_3_1_1_float32_SimpleAnisotropicStridesTest) {
  std::array<uint32_t, 2> stride({3, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -497,8 +500,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -562,8 +565,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedConstFilterTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -627,8 +630,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedBiasTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -691,8 +694,8 @@ TEST(Conv2d, shape_4_3_1_1_float32_HandCalculatedValidTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -759,8 +762,8 @@ TEST(Conv2d, DISABLED_shape_4_2_2_2_float32_DisabledPointwiseMultifilterTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -827,8 +830,8 @@ TEST(Conv2d, shape_9_9_1_1_float32_SimpleDilationTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({3, 3});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -893,8 +896,8 @@ TEST(Conv2d, shape_4_2_1_2_float32_StrideTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -958,8 +961,8 @@ TEST(Conv2d, shape_4_2_1_2_float32_InputAndFilterSameWidthHeightTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1011,13 +1014,13 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest1) {
  std::vector<int32_t> zero_point_output = {scales_zp.second};
  tim::vx::Quantization quant_input(tim::vx::QuantType::ASYMMETRIC, 2,
-                                   scales_input, zero_point_input);
+                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_weight, zero_point_weight);
+                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
-                                  zero_point_bias);
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_output, zero_point_output);
+                                     scales_output, zero_point_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::UINT8, input_shape,
                                 tim::vx::TensorAttribute::INPUT, quant_input);
@ -1047,8 +1050,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest1) {
  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1062,8 +1065,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest1) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({1, 1});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1094,8 +1097,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {
  tim::vx::ShapeType output_shape(
      {2, 1, weight_shape[3], input_shape[3]});  //whcn
-  float input_min = -128.5, input_max = 128, weight_min = -128.5, weight_max = 128,
+  float input_min = -128.5, input_max = 128, weight_min = -128.5,
-        output_min = -127, output_max = 128;
+        weight_max = 128, output_min = -127, output_max = 128;
  std::pair<float, int32_t> scales_zp;
@ -1115,13 +1118,13 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {
  std::vector<int32_t> zero_point_output = {scales_zp.second};
  tim::vx::Quantization quant_input(tim::vx::QuantType::ASYMMETRIC, 2,
-                                   scales_input, zero_point_input);
+                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_weight, zero_point_weight);
+                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
-                                  zero_point_bias);
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_output, zero_point_output);
+                                     scales_output, zero_point_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::UINT8, input_shape,
                                 tim::vx::TensorAttribute::INPUT, quant_input);
@ -1151,8 +1154,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {
  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1167,8 +1170,8 @@ TEST(Conv2d, shape_4_2_1_2_uint8_QuantizedTest2) {
  std::array<uint32_t, 2> stride({2, 2});
  std::array<uint32_t, 2> dilation({1, 1});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1220,13 +1223,13 @@ TEST(Conv2d, shape_6_3_1_1_uint8_AnisotropicStridesQuantizedTest) {
  std::vector<int32_t> zero_point_output = {scales_zp.second};
  tim::vx::Quantization quant_input(tim::vx::QuantType::ASYMMETRIC, 2,
-                                   scales_input, zero_point_input);
+                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_weight, zero_point_weight);
+                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
-                                  zero_point_bias);
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_output, zero_point_output);
+                                     scales_output, zero_point_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::UINT8, input_shape,
                                 tim::vx::TensorAttribute::INPUT, quant_input);
@ -1255,8 +1258,8 @@ TEST(Conv2d, shape_6_3_1_1_uint8_AnisotropicStridesQuantizedTest) {
  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1271,8 +1274,8 @@ TEST(Conv2d, shape_6_3_1_1_uint8_AnisotropicStridesQuantizedTest) {
  std::array<uint32_t, 2> stride({3, 1});
  std::array<uint32_t, 2> dilation({1, 1});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1324,13 +1327,13 @@ TEST(Conv2d, shape_9_9_1_1_uint8_DilationQuantizedTest) {
  std::vector<int32_t> zero_point_output = {scales_zp.second};
  tim::vx::Quantization quant_input(tim::vx::QuantType::ASYMMETRIC, 2,
-                                   scales_input, zero_point_input);
+                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_weight, zero_point_weight);
+                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
-                                  zero_point_bias);
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_output, zero_point_output);
+                                     scales_output, zero_point_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::UINT8, input_shape,
                                 tim::vx::TensorAttribute::INPUT, quant_input);
@ -1362,8 +1365,8 @@ TEST(Conv2d, shape_9_9_1_1_uint8_DilationQuantizedTest) {
  std::vector<u_int8_t> input_data =
      Quantize<uint8_t>(input_data_float, scales_input[0], zero_point_input[0]);
-  std::vector<u_int8_t> weight_data =
+  std::vector<u_int8_t> weight_data = Quantize<uint8_t>(
-      Quantize<uint8_t>(weight_data_float, scales_weight[0], zero_point_input[0]);
+      weight_data_float, scales_weight[0], zero_point_input[0]);
  std::vector<int32_t> bias_data =
      Quantize<int32_t>(bias_data_float, scales_bias[0], zero_point_bias[0]);
  std::vector<u_int8_t> golden =
@ -1378,8 +1381,8 @@ TEST(Conv2d, shape_9_9_1_1_uint8_DilationQuantizedTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({3, 3});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1431,13 +1434,13 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerTensorTest) {
  std::vector<int32_t> zero_point_output = {scales_zp.second};
  tim::vx::Quantization quant_input(tim::vx::QuantType::ASYMMETRIC, 2,
-                                   scales_input, zero_point_input);
+                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_weight, zero_point_weight);
+                                     scales_weight, zero_point_weight);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2, scales_bias,
+  tim::vx::Quantization quant_bias(tim::vx::QuantType::ASYMMETRIC, 2,
-                                  zero_point_bias);
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_output, zero_point_output);
+                                     scales_output, zero_point_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::INT8, input_shape,
                                 tim::vx::TensorAttribute::INPUT, quant_input);
@ -1481,8 +1484,8 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerTensorTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({1, 1});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1527,7 +1530,7 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerChannelTest) {
  std::vector<int32_t> zero_point_weight = {0, 0};
  std::vector<float> scales_bias = {scales_input[0] * scales_weight[0],
-                                   scales_input[0] * scales_weight[1]};
+                                    scales_input[0] * scales_weight[1]};
  std::vector<int32_t> zero_point_bias = {0, 0};
  scales_zp = QuantizationParams<int8_t>(output_min, output_max);
@ -1535,13 +1538,13 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerChannelTest) {
  std::vector<int32_t> zero_point_output = {scales_zp.second};
  tim::vx::Quantization quant_input(tim::vx::QuantType::ASYMMETRIC, 2,
-                                   scales_input, zero_point_input);
+                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL,
-                                    3, scales_weight, zero_point_weight);
+                                     3, scales_weight, zero_point_weight);
  tim::vx::Quantization quant_bias(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, 0,
-                                  scales_bias, zero_point_bias);
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_output, zero_point_output);
+                                     scales_output, zero_point_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::INT8, input_shape,
                                 tim::vx::TensorAttribute::INPUT, quant_input);
@ -1583,8 +1586,8 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerChannelTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({1, 1});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1609,12 +1612,12 @@ TEST(Conv2d, shape_3_2_2_1_int8_QuantizedPerChannelTest) {
 TEST(Conv2d, shape_w_h_128_1_ksize_1_1_stride_2_int8_QuantizedPerChannelTest) {
  std::map<uint32_t, std::vector<uint32_t>> input_shape_list;
  input_shape_list[32] = {18, 20, 22, 26, 28, 30, 34, 36, 38,
-                        42, 44, 46, 50, 52, 54, 58, 60, 62};
+                          42, 44, 46, 50, 52, 54, 58, 60, 62};
  input_shape_list[63] = {18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62};
  input_shape_list[95] = {18, 20, 22, 26, 28, 30, 34, 36, 38,
-                        42, 44, 46, 50, 52, 54, 58, 60, 62};
+                          42, 44, 46, 50, 52, 54, 58, 60, 62};
  input_shape_list[96] = {18, 20, 22, 26, 28, 30, 34, 36, 38,
-                        42, 44, 46, 50, 52, 54, 58, 60, 62};
+                          42, 44, 46, 50, 52, 54, 58, 60, 62};
  tim::vx::ShapeType input_shape({2, 2, 128, 1});     //whcn
  tim::vx::ShapeType weight_shape({1, 1, 128, 256});  //whio
  tim::vx::ShapeType bias_shape({weight_shape[3]});
@ -1642,13 +1645,13 @@ TEST(Conv2d, shape_w_h_128_1_ksize_1_1_stride_2_int8_QuantizedPerChannelTest) {
  std::vector<int32_t> zero_point_output = {-1};
  tim::vx::Quantization quant_input(tim::vx::QuantType::ASYMMETRIC, 2,
-                                   scales_input, zero_point_input);
+                                    scales_input, zero_point_input);
  tim::vx::Quantization quant_weight(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL,
-                                    3, scales_weight, zero_point_weight);
+                                     3, scales_weight, zero_point_weight);
  tim::vx::Quantization quant_bias(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, 0,
-                                  scales_bias, zero_point_bias);
+                                   scales_bias, zero_point_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::ASYMMETRIC, 2,
-                                    scales_output, zero_point_output);
+                                     scales_output, zero_point_output);
  uint32_t weight_size =
      weight_shape[0] * weight_shape[1] * weight_shape[2] * weight_shape[3];
@ -1699,8 +1702,8 @@ TEST(Conv2d, shape_w_h_128_1_ksize_1_1_stride_2_int8_QuantizedPerChannelTest) {
      for (uint32_t i = 0; i < golden_size; i++) {
        golden_float[i] = 129;
      }
-      std::vector<int8_t> golden =
+      std::vector<int8_t> golden = Quantize<int8_t>(
-          Quantize<int8_t>(golden_float, scales_output[0], zero_point_output[0]);
+          golden_float, scales_output[0], zero_point_output[0]);
      auto ctx = tim::vx::Context::Create();
      auto graph = ctx->CreateGraph();
@ -1738,28 +1741,30 @@ TEST(Conv2d, shape_w_h_128_1_ksize_1_1_stride_2_int8_QuantizedPerChannelTest) {
  }
 }
-TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest){
+TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest) {
  auto ctx = tim::vx::Context::Create();
-  if(ctx->isClOnly()) GTEST_SKIP();
+  if (ctx->isClOnly()) GTEST_SKIP();
  auto graph = ctx->CreateGraph();
  tim::vx::ShapeType input_shape({4, 2, 2, 2});   //whcn
  tim::vx::ShapeType weight_shape({1, 1, 2, 1});  //whio
  tim::vx::ShapeType bias_shape({weight_shape[3]});
  tim::vx::ShapeType output_shape(
      {4, 2, weight_shape[3], input_shape[3]});  //whcn
-  int8_t fl_input = 9, fl_weight= 8,  fl_output = 8;
+  int8_t fl_input = 9, fl_weight = 8, fl_output = 8;
-  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_input);
+  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
-  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_weight);
+                                    fl_input);
-  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_output);
+  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
                                     fl_weight);
  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
                                     fl_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::INT16, input_shape,
-                                     tim::vx::TensorAttribute::INPUT,
+                                 tim::vx::TensorAttribute::INPUT, quant_input);
                                     quant_input);
  tim::vx::TensorSpec weight_spec(tim::vx::DataType::INT16, weight_shape,
-                                      tim::vx::TensorAttribute::CONSTANT,
+                                  tim::vx::TensorAttribute::CONSTANT,
-                                      quant_weight);
+                                  quant_weight);
  tim::vx::TensorSpec output_spec(tim::vx::DataType::INT16, output_shape,
-                                      tim::vx::TensorAttribute::OUTPUT,
+                                  tim::vx::TensorAttribute::OUTPUT,
-                                      quant_output);
+                                  quant_output);
  // Input data  float
  std::vector<float> input_data_float = {
@ -1767,25 +1772,23 @@ TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest){
      0.5, 1,   1.5, 2,   0.5, 1, 1.5, 2, 0.5, 1,   1.5, 2,   0.5, 1, 1.5, 2};
  // weight data   float
-  std::vector<float> weight_data_float= {
+  std::vector<float> weight_data_float = {
      1, 2  // first filter
  };
  //input data(dfp16)
-  std::vector<int16_t> input_data = {
+  std::vector<int16_t> input_data = {256, 256, 256, 256,  512, 512, 512, 512,
-      256,256,256,256, 512,512,512,512, 256,256,256,256,512,512,512,512,
+                                     256, 256, 256, 256,  512, 512, 512, 512,
-      256,512,768,1024,256,512,768,1024,256,512,768,1024,256,512,768,1024
+                                     256, 512, 768, 1024, 256, 512, 768, 1024,
-  };
+                                     256, 512, 768, 1024, 256, 512, 768, 1024};
  //weight data(dfp16)
-  std::vector<int16_t> weight_data = {
+  std::vector<int16_t> weight_data = {256, 512};
      256,512
  };
  // bias data
  std::vector<int64_t> bias_data = {0};
  //golden
  std::vector<float> golden = {1.5, 1.5, 1.5, 1.5, 3,   3, 3,   3,
                               1.5, 3,   4.5, 6,   1.5, 3, 4.5, 6};
-  auto input_tensor = graph->CreateTensor(input_spec,input_data.data());
+  auto input_tensor = graph->CreateTensor(input_spec, input_data.data());
  auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data());
  auto output_tensor = graph->CreateTensor(output_spec);
@ -1793,8 +1796,8 @@ TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest){
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1812,16 +1815,16 @@ TEST(Conv2d, shape_4_2_2_2_int16_DFPQuantizedTest){
  }
  std::vector<int16_t> output(output_size);
  EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
- //transform output(int16) to fp
+  //transform output(int16) to fp
  std::vector<float> f;
-  for(const auto& q : output){
+  for (const auto& q : output) {
-    f.push_back( q / (float)((int64_t)1 << fl_output));
+    f.push_back(q / (float)((int64_t)1 << fl_output));
  }
  EXPECT_EQ(golden, f);
 }
 TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
  auto ctx = tim::vx::Context::Create();
-  if(ctx->isClOnly()) GTEST_SKIP();
+  if (ctx->isClOnly()) GTEST_SKIP();
  auto graph = ctx->CreateGraph();
  tim::vx::ShapeType input_shape({4, 2, 1, 1});   //whcn
@ -1829,32 +1832,34 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
  tim::vx::ShapeType bias_shape({weight_shape[3]});
  tim::vx::ShapeType output_shape(
      {4, 2, weight_shape[3], input_shape[3]});  //whcn
-  int8_t fl_input = 9, fl_weight = 8, fl_bias = 17,fl_output = 8;
+  int8_t fl_input = 9, fl_weight = 8, fl_bias = 17, fl_output = 8;
-  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_input);
+  tim::vx::Quantization quant_input(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
-  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_weight);
+                                    fl_input);
-  tim::vx::Quantization quant_bias(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_bias);
+  tim::vx::Quantization quant_weight(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
-  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT, fl_output);
+                                     fl_weight);
  tim::vx::Quantization quant_bias(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
                                   fl_bias);
  tim::vx::Quantization quant_output(tim::vx::QuantType::DYNAMIC_FIXED_POINT,
                                     fl_output);
  tim::vx::TensorSpec input_spec(tim::vx::DataType::INT16, input_shape,
-                                     tim::vx::TensorAttribute::INPUT,
+                                 tim::vx::TensorAttribute::INPUT, quant_input);
                                     quant_input);
  tim::vx::TensorSpec weight_spec(tim::vx::DataType::INT16, weight_shape,
-                                      tim::vx::TensorAttribute::CONSTANT,
+                                  tim::vx::TensorAttribute::CONSTANT,
-                                      quant_weight);
+                                  quant_weight);
  tim::vx::TensorSpec bias_spec(tim::vx::DataType::INT64, bias_shape,
-                                      tim::vx::TensorAttribute::CONSTANT,
+                                tim::vx::TensorAttribute::CONSTANT, quant_bias);
                                      quant_bias);
  tim::vx::TensorSpec output_spec(tim::vx::DataType::INT16, output_shape,
-                                      tim::vx::TensorAttribute::OUTPUT,
+                                  tim::vx::TensorAttribute::OUTPUT,
-                                      quant_output);
+                                  quant_output);
  // Input data  nchw
-  std::vector<float> input_data_float= {
+  std::vector<float> input_data_float = {
      1, 1, 1, 1,  // row = 1
      2, 2, 3, 2   // row = 2
  };
  // weight data   oihw
-  std::vector<float> weight_data_float= {
+  std::vector<float> weight_data_float = {
      1,  2,  3,  4,  //first 2x2 filter
      -1, 1,  -1, 1,  // second 2x2 filter
      -1, -1, 1,  1,  // third 2x2 filter
@ -1865,24 +1870,18 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
  // nchw
  std::vector<float> golden = {// first channel
-                               18, 22, 21, 8,  7, 9, 8, 3,
+                               18, 22, 21, 8, 7, 9, 8, 3,
                               // second channel
-                               2,  3,  1,  -1, 2, 3, 1, 0,
+                               2, 3, 1, -1, 2, 3, 1, 0,
                               // third channel
-                               5,  6,  6,  4, -1, -2, -2, 1};
+                               5, 6, 6, 4, -1, -2, -2, 1};
-  std::vector<int16_t> input_data = {
+  std::vector<int16_t> input_data = {512,  512,  512,  512,
-    512, 512, 512, 512,
+                                     1024, 1024, 1536, 1024};
-    1024,1024,1536,1024
+  std::vector<int16_t> weight_data = {256,  512, 768,  1024, -256, 256,
-  };
+                                      -256, 256, -256, -256, 256,  256};
-  std::vector<int16_t> weight_data = {
+  std::vector<int64_t> bias_data = {1 << fl_bias, 2 * (1 << fl_bias),
-    256,512,768,1024,
+                                    3 * (1 << fl_bias)};
    -256,256,-256,256,
    -256,-256,256,256
  };
  std::vector<int64_t> bias_data = {
    1<<fl_bias, 2*(1<<fl_bias),3*(1<<fl_bias)
  };
  auto input_tensor = graph->CreateTensor(input_spec, input_data.data());
  auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data());
@ -1894,8 +1893,8 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
  std::array<uint32_t, 2> stride({1, 1});
  std::array<uint32_t, 2> dilation({0, 0});
-  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+  auto conv2d =
-      padding, stride, dilation);
+      graph->CreateOperation<tim::vx::ops::Conv2d>(padding, stride, dilation);
  (*conv2d)
      .BindInput(input_tensor)
      .BindInput(weight_tensor)
@ -1916,8 +1915,8 @@ TEST(Conv2d, shape_4_2_1_1_int16_DFPQuantizedTest) {
  EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
  //transform output(int16) to fp
  std::vector<float> f;
-  for(const auto& q : output){
+  for (const auto& q : output) {
-    f.push_back( q / (float)((int64_t)1 << fl_output));
+    f.push_back(q / (float)((int64_t)1 << fl_output));
  }
  EXPECT_EQ(golden, f);
 }
@ -1926,7 +1925,7 @@ TEST(Conv2d, kernel_bigger_than_input_SAME) {
  auto ctx = tim::vx::Context::Create();
  auto graph = ctx->CreateGraph();
-  tim::vx::ShapeType input_shape({2, 3, 1, 1});  //whcn
+  tim::vx::ShapeType input_shape({2, 3, 1, 1});   //whcn
  tim::vx::ShapeType kernel_shape({3, 2, 1, 1});  //whio
  tim::vx::ShapeType bias_shape({1});
  tim::vx::ShapeType output_shape({2, 3, 1, 1});
@ -1939,13 +1938,16 @@ TEST(Conv2d, kernel_bigger_than_input_SAME) {
  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32, output_shape,
                                  tim::vx::TensorAttribute::OUTPUT);
-  std::vector<float> input_data = {1.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f,
+  std::vector<float> input_data = {
-                                };
+      1.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f,
-  std::vector<float> weight = {100.0f, 20.0f, 1.0f, 200.0f, 10.0f, 2.0f,
+  };
-                               };
+  std::vector<float> weight = {
      100.0f, 20.0f, 1.0f, 200.0f, 10.0f, 2.0f,
  };
  std::vector<float> bias = {500.0f};
-  std::vector<float> golden = {567.0f,  1480.0f, 608.0f,  1370.0f,
+  std::vector<float> golden = {
-                               543.0f,  760.0f, };
+      567.0f, 1480.0f, 608.0f, 1370.0f, 543.0f, 760.0f,
  };
  auto input_tensor = graph->CreateTensor(input_spec);
  auto weight_tensor = graph->CreateTensor(kernel_spec, weight.data());
  auto bias_tensor = graph->CreateTensor(bias_spec, bias.data());
@ -1956,7 +1958,9 @@ TEST(Conv2d, kernel_bigger_than_input_SAME) {
  auto op = graph->CreateOperation<tim::vx::ops::Conv2d>(
      tim::vx::PadType::SAME, strides, dilations, 0, tim::vx::DataLayout::WHCN,
      tim::vx::DataLayout::IcWHOc);
-  (*op).BindInputs({input_tensor, weight_tensor, bias_tensor}).BindOutputs({output_tensor});
+  (*op)
      .BindInputs({input_tensor, weight_tensor, bias_tensor})
      .BindOutputs({output_tensor});
  EXPECT_TRUE(graph->Compile());
--- a/third_party/half/ChangeLog.txt
+++ b/third_party/half/ChangeLog.txt
@ -0,0 +1,184 @@
 Release Notes											{#changelog}
 =============
 1.12.0 release (2017-03-06):
 ----------------------------
 - Changed behaviour of `half_cast` to perform conversions to/from `double` 
  and `long double` directly according to specified rounding mode, without an 
  intermediate `float` conversion.
 - Added `noexcept` specifiers to constructors.
 - Fixed minor portability problem with `logb` and `ilogb`.
 - Tested for *VC++ 2015*.
 1.11.0 release (2013-11-16):
 ----------------------------
 - Made tie-breaking behaviour in round to nearest configurable by 
  `HALF_ROUND_TIES_TO_EVEN` macro.
 - Completed support for all C++11 mathematical functions even if single-
  precision versions from `<cmath>` are unsupported.
 - Fixed inability to disable support for C++11 mathematical functions on 
  *VC++ 2013*.
 1.10.0 release (2013-11-09):
 ----------------------------
 - Made default rounding mode configurable by `HALF_ROUND_STYLE` macro.
 - Added support for non-IEEE single-precision implementations.
 - Added `HALF_ENABLE_CPP11_TYPE_TRAITS` preprocessor flag for checking 
  support for C++11 type traits and TMP features.
 - Restricted `half_cast` to support built-in arithmetic types only.
 - Changed behaviour of `half_cast` to respect rounding mode when casting 
  to/from integer types.
 1.9.2 release (2013-11-01):
 ---------------------------
 - Tested for *gcc 4.8*.
 - Tested and fixed for *VC++ 2013*.
 - Removed unnecessary warnings in *MSVC*.
 1.9.1 release (2013-08-08):
 ---------------------------
 - Fixed problems with older gcc and MSVC versions.
 - Small fix to non-C++11 implementations of `remainder` and `remquo`.
 1.9.0 release (2013-08-07):
 ---------------------------
 - Changed behaviour of `nearbyint`, `rint`, `lrint` and `llrint` to use 
  rounding mode of half-precision implementation (which is 
  truncating/indeterminate) instead of single-precision rounding mode.
 - Added support for more C++11 mathematical functions even if single-
  precision versions from `<cmath>` are unsupported, in particular 
  `remainder`, `remquo` and `cbrt`.
 - Minor implementation changes.
 1.8.1 release (2013-01-22):
 ---------------------------
 - Fixed bug resulting in multiple definitions of the `nanh` function due to 
  a missing `inline` specification.
 1.8.0 release (2013-01-19):
 ---------------------------
 - Added support for more C++11 mathematical functions even if single-
  precision versions from `<cmath>` are unsupported, in particular 
  exponential and logarithm functions, hyperbolic area functions and the 
  hypotenuse function.
 - Made `fma` function use default implementation if single-precision version
  from `<cmath>` is not faster and thus `FP_FAST_FMAH` to be defined always.
 - Fixed overload resolution issues when invoking certain mathematical 
  functions by unqualified calls.
 1.7.0 release (2012-10-26):
 ---------------------------
 - Added support for C++11 `noexcept` specifiers.
 - Changed C++11 `long long` to be supported on *VC++ 2003* and up.
 1.6.1 release (2012-09-13):
 ---------------------------
 - Made `fma` and `fdim` functions available even if corresponding 
  single-precision functions are not.
 1.6.0 release (2012-09-12):
 ---------------------------
 - Added `HALF_ENABLE_CPP11_LONG_LONG` to control support for `long long` 
  integers and corresponding mathematical functions.
 - Fixed C++98 compatibility on non-VC compilers.
 1.5.1 release (2012-08-17):
 ---------------------------
 - Recorrected `std::numeric_limits::round_style` to always return 
  `std::round_indeterminate`, due to overflow-handling deviating from 
  correct round-toward-zero behaviour.
 1.5.0 release (2012-08-16):
 ---------------------------
 - Added `half_cast` for explicitly casting between half and any type 
  convertible to/from `float` and allowing the explicit specification of 
  the rounding mode to use.
 1.4.0 release (2012-08-12):
 ---------------------------
 - Added support for C++11 generalized constant expressions (`constexpr`).
 1.3.1 release (2012-08-11):
 ---------------------------
 - Fixed requirement for `std::signbit` and `std::isnan` (even if C++11 
  `<cmath>` functions disabled) on non-VC compilers.
 1.3.0 release (2012-08-10):
 ---------------------------
 - Made requirement for `<cstdint>` and `static_assert` optional and thus 
  made the library C++98-compatible.
 - Made support for C++11 features user-overridable through explicit 
  definition of corresponding preprocessor symbols to either 0 or 1.
 - Renamed `HALF_ENABLE_HASH` to `HALF_ENABLE_CPP11_HASH` in correspondence 
  with other C++11 preprocessor symbols.
 1.2.0 release (2012-08-07):
 ---------------------------
 - Added proper preprocessor definitions for `HUGE_VALH` and `FP_FAST_FMAH` 
  in correspondence with their single-precision counterparts from `<cmath>`.
 - Fixed internal preprocessor macros to be properly undefined after use.
 1.1.2 release (2012-08-07):
 ---------------------------
 - Revised `std::numeric_limits::round_style` to return 
  `std::round_toward_zero` if the `float` version also does and 
  `std::round_indeterminate` otherwise.
 - Fixed `std::numeric_limits::round_error` to reflect worst-case round 
  toward zero behaviour.
 1.1.1 release (2012-08-06):
 ---------------------------
 - Fixed `std::numeric_limits::min` to return smallest positive normal 
  number, instead of subnormal number.
 - Fixed `std::numeric_limits::round_style` to return 
  `std::round_indeterminate` due to mixture of separately rounded 
  single-precision arithmetics with truncating single-to-half conversions.
 1.1.0 release (2012-08-06):
 ---------------------------
 - Added half-precision literals.
 1.0.0 release (2012-08-05):
 ---------------------------
 - First release.
--- a/third_party/half/ChangeLog.txt.license
+++ b/third_party/half/ChangeLog.txt.license
@ -0,0 +1,4 @@
 #
 # Copyright (c) 2012-2017 Christian Rau
 # SPDX-License-Identifier: MIT
 #
--- a/third_party/half/LICENSE.spdx
+++ b/third_party/half/LICENSE.spdx
@ -0,0 +1,7 @@
 PackageName: half
 SPDXID: SPDXRef-half
 FilesAnalyzed: true
 PackageLicenseConcluded: MIT
 PackageLicenseInfoFromFiles: MIT
 PackageLicenseDeclared: MIT
 PackageCopyrightText:<text>Copyright (c) 2012-2017 Christian Rau <rauy@users.sourceforge.net></text>
--- a/third_party/half/LICENSE.txt
+++ b/third_party/half/LICENSE.txt
@ -0,0 +1,21 @@
 The MIT License
 Copyright (c) 2012-2017 Christian Rau
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
--- a/third_party/half/README.txt
+++ b/third_party/half/README.txt
@ -0,0 +1,288 @@
 HALF-PRECISION FLOATING POINT LIBRARY (Version 1.12.0)
 ------------------------------------------------------
 This is a C++ header-only library to provide an IEEE 754 conformant 16-bit 
 half-precision floating point type along with corresponding arithmetic 
 operators, type conversions and common mathematical functions. It aims for both 
 efficiency and ease of use, trying to accurately mimic the behaviour of the 
 builtin floating point types at the best performance possible.
 INSTALLATION AND REQUIREMENTS
 -----------------------------
 Comfortably enough, the library consists of just a single header file 
 containing all the functionality, which can be directly included by your 
 projects, without the neccessity to build anything or link to anything.
 Whereas this library is fully C++98-compatible, it can profit from certain 
 C++11 features. Support for those features is checked automatically at compile 
 (or rather preprocessing) time, but can be explicitly enabled or disabled by 
 defining the corresponding preprocessor symbols to either 1 or 0 yourself. This 
 is useful when the automatic detection fails (for more exotic implementations) 
 or when a feature should be explicitly disabled:
  - 'long long' integer type for mathematical functions returning 'long long' 
    results (enabled for VC++ 2003 and newer, gcc and clang, overridable with 
    'HALF_ENABLE_CPP11_LONG_LONG').
  - Static assertions for extended compile-time checks (enabled for VC++ 2010, 
    gcc 4.3, clang 2.9 and newer, overridable with 'HALF_ENABLE_CPP11_STATIC_ASSERT').
  - Generalized constant expressions (enabled for VC++ 2015, gcc 4.6, clang 3.1 
    and newer, overridable with 'HALF_ENABLE_CPP11_CONSTEXPR').
  - noexcept exception specifications (enabled for VC++ 2015, gcc 4.6, clang 3.0 
    and newer, overridable with 'HALF_ENABLE_CPP11_NOEXCEPT').
  - User-defined literals for half-precision literals to work (enabled for 
    VC++ 2015, gcc 4.7, clang 3.1 and newer, overridable with 
    'HALF_ENABLE_CPP11_USER_LITERALS').
  - Type traits and template meta-programming features from <type_traits> 
    (enabled for VC++ 2010, libstdc++ 4.3, libc++ and newer, overridable with 
    'HALF_ENABLE_CPP11_TYPE_TRAITS').
  - Special integer types from <cstdint> (enabled for VC++ 2010, libstdc++ 4.3, 
    libc++ and newer, overridable with 'HALF_ENABLE_CPP11_CSTDINT').
  - Certain C++11 single-precision mathematical functions from <cmath> for 
    an improved implementation of their half-precision counterparts to work 
    (enabled for VC++ 2013, libstdc++ 4.3, libc++ and newer, overridable with 
    'HALF_ENABLE_CPP11_CMATH').
  - Hash functor 'std::hash' from <functional> (enabled for VC++ 2010, 
    libstdc++ 4.3, libc++ and newer, overridable with 'HALF_ENABLE_CPP11_HASH').
 The library has been tested successfully with Visual C++ 2005-2015, gcc 4.4-4.8 
 and clang 3.1. Please contact me if you have any problems, suggestions or even 
 just success testing it on other platforms.
 DOCUMENTATION
 -------------
 Here follow some general words about the usage of the library and its 
 implementation. For a complete documentation of its iterface look at the 
 corresponding website http://half.sourceforge.net. You may also generate the 
 complete developer documentation from the library's only include file's doxygen 
 comments, but this is more relevant to developers rather than mere users (for 
 reasons described below).
 BASIC USAGE
 To make use of the library just include its only header file half.hpp, which 
 defines all half-precision functionality inside the 'half_float' namespace. The 
 actual 16-bit half-precision data type is represented by the 'half' type. This 
 type behaves like the builtin floating point types as much as possible, 
 supporting the usual arithmetic, comparison and streaming operators, which 
 makes its use pretty straight-forward:
    using half_float::half;
    half a(3.4), b(5);
    half c = a * b;
    c += 3;
    if(c > a)
 	    std::cout << c << std::endl;
 Additionally the 'half_float' namespace also defines half-precision versions 
 for all mathematical functions of the C++ standard library, which can be used 
 directly through ADL:
    half a(-3.14159);
    half s = sin(abs(a));
    long l = lround(s);
 You may also specify explicit half-precision literals, since the library 
 provides a user-defined literal inside the 'half_float::literal' namespace, 
 which you just need to import (assuming support for C++11 user-defined literals):
    using namespace half_float::literal;
    half x = 1.0_h;
 Furthermore the library provides proper specializations for 
 'std::numeric_limits', defining various implementation properties, and 
 'std::hash' for hashing half-precision numbers (assuming support for C++11 
 'std::hash'). Similar to the corresponding preprocessor symbols from <cmath> 
 the library also defines the 'HUGE_VALH' constant and maybe the 'FP_FAST_FMAH' 
 symbol.
 CONVERSIONS AND ROUNDING
 The half is explicitly constructible/convertible from a single-precision float 
 argument. Thus it is also explicitly constructible/convertible from any type 
 implicitly convertible to float, but constructing it from types like double or 
 int will involve the usual warnings arising when implicitly converting those to 
 float because of the lost precision. On the one hand those warnings are 
 intentional, because converting those types to half neccessarily also reduces 
 precision. But on the other hand they are raised for explicit conversions from 
 those types, when the user knows what he is doing. So if those warnings keep 
 bugging you, then you won't get around first explicitly converting to float 
 before converting to half, or use the 'half_cast' described below. In addition 
 you can also directly assign float values to halfs.
 In contrast to the float-to-half conversion, which reduces precision, the 
 conversion from half to float (and thus to any other type implicitly 
 convertible from float) is implicit, because all values represetable with 
 half-precision are also representable with single-precision. This way the 
 half-to-float conversion behaves similar to the builtin float-to-double 
 conversion and all arithmetic expressions involving both half-precision and 
 single-precision arguments will be of single-precision type. This way you can 
 also directly use the mathematical functions of the C++ standard library, 
 though in this case you will invoke the single-precision versions which will 
 also return single-precision values, which is (even if maybe performing the 
 exact same computation, see below) not as conceptually clean when working in a 
 half-precision environment.
 The default rounding mode for conversions from float to half uses truncation 
 (round toward zero, but mapping overflows to infinity) for rounding values not 
 representable exactly in half-precision. This is the fastest rounding possible 
 and is usually sufficient. But by redefining the 'HALF_ROUND_STYLE' 
 preprocessor symbol (before including half.hpp) this default can be overridden 
 with one of the other standard rounding modes using their respective constants 
 or the equivalent values of 'std::float_round_style' (it can even be 
 synchronized with the underlying single-precision implementation by defining it 
 to 'std::numeric_limits<float>::round_style'):
  - 'std::round_indeterminate' or -1 for the fastest rounding (default).
  - 'std::round_toward_zero' or 0 for rounding toward zero.
  - std::round_to_nearest' or 1 for rounding to the nearest value.
  - std::round_toward_infinity' or 2 for rounding toward positive infinity.
  - std::round_toward_neg_infinity' or 3 for rounding toward negative infinity.
 In addition to changing the overall default rounding mode one can also use the 
 'half_cast'. This converts between half and any built-in arithmetic type using 
 a configurable rounding mode (or the default rounding mode if none is 
 specified). In addition to a configurable rounding mode, 'half_cast' has 
 another big difference to a mere 'static_cast': Any conversions are performed 
 directly using the given rounding mode, without any intermediate conversion 
 to/from 'float'. This is especially relevant for conversions to integer types, 
 which don't necessarily truncate anymore. But also for conversions from 
 'double' or 'long double' this may produce more precise results than a 
 pre-conversion to 'float' using the single-precision implementation's current 
 rounding mode would.
    half a = half_cast<half>(4.2);
    half b = half_cast<half,std::numeric_limits<float>::round_style>(4.2f);
    assert( half_cast<int, std::round_to_nearest>( 0.7_h )     == 1 );
    assert( half_cast<half,std::round_toward_zero>( 4097 )     == 4096.0_h );
    assert( half_cast<half,std::round_toward_infinity>( 4097 ) == 4100.0_h );
    assert( half_cast<half,std::round_toward_infinity>( std::numeric_limits<double>::min() ) > 0.0_h );
 When using round to nearest (either as default or through 'half_cast') ties are 
 by default resolved by rounding them away from zero (and thus equal to the 
 behaviour of the 'round' function). But by redefining the 
 'HALF_ROUND_TIES_TO_EVEN' preprocessor symbol to 1 (before including half.hpp) 
 this default can be changed to the slightly slower but less biased and more 
 IEEE-conformant behaviour of rounding half-way cases to the nearest even value.
    #define HALF_ROUND_TIES_TO_EVEN 1
    #include <half.hpp>
    ...
    assert( half_cast<int,std::round_to_nearest>(3.5_h) 
         == half_cast<int,std::round_to_nearest>(4.5_h) );
 IMPLEMENTATION
 For performance reasons (and ease of implementation) many of the mathematical 
 functions provided by the library as well as all arithmetic operations are 
 actually carried out in single-precision under the hood, calling to the C++ 
 standard library implementations of those functions whenever appropriate, 
 meaning the arguments are converted to floats and the result back to half. But 
 to reduce the conversion overhead as much as possible any temporary values 
 inside of lengthy expressions are kept in single-precision as long as possible, 
 while still maintaining a strong half-precision type to the outside world. Only 
 when finally assigning the value to a half or calling a function that works 
 directly on halfs is the actual conversion done (or never, when further 
 converting the result to float.
 This approach has two implications. First of all you have to treat the 
 library's documentation at http://half.sourceforge.net as a simplified version, 
 describing the behaviour of the library as if implemented this way. The actual 
 argument and return types of functions and operators may involve other internal 
 types (feel free to generate the exact developer documentation from the Doxygen 
 comments in the library's header file if you really need to). But nevertheless 
 the behaviour is exactly like specified in the documentation. The other 
 implication is, that in the presence of rounding errors or over-/underflows 
 arithmetic expressions may produce different results when compared to 
 converting to half-precision after each individual operation:
    half a = std::numeric_limits<half>::max() * 2.0_h / 2.0_h;       // a = MAX
    half b = half(std::numeric_limits<half>::max() * 2.0_h) / 2.0_h; // b = INF
    assert( a != b );
 But this should only be a problem in very few cases. One last word has to be 
 said when talking about performance. Even with its efforts in reducing 
 conversion overhead as much as possible, the software half-precision 
 implementation can most probably not beat the direct use of single-precision 
 computations. Usually using actual float values for all computations and 
 temproraries and using halfs only for storage is the recommended way. On the 
 one hand this somehow makes the provided mathematical functions obsolete 
 (especially in light of the implicit conversion from half to float), but 
 nevertheless the goal of this library was to provide a complete and 
 conceptually clean half-precision implementation, to which the standard 
 mathematical functions belong, even if usually not needed.
 IEEE CONFORMANCE
 The half type uses the standard IEEE representation with 1 sign bit, 5 exponent 
 bits and 10 mantissa bits (11 when counting the hidden bit). It supports all 
 types of special values, like subnormal values, infinity and NaNs. But there 
 are some limitations to the complete conformance to the IEEE 754 standard:
  - The implementation does not differentiate between signalling and quiet 
    NaNs, this means operations on halfs are not specified to trap on 
    signalling NaNs (though they may, see last point).
  - Though arithmetic operations are internally rounded to single-precision 
    using the underlying single-precision implementation's current rounding 
    mode, those values are then converted to half-precision using the default 
    half-precision rounding mode (changed by defining 'HALF_ROUND_STYLE' 
    accordingly). This mixture of rounding modes is also the reason why 
    'std::numeric_limits<half>::round_style' may actually return 
    'std::round_indeterminate' when half- and single-precision rounding modes 
    don't match.
  - Because of internal truncation it may also be that certain single-precision 
    NaNs will be wrongly converted to half-precision infinity, though this is 
    very unlikely to happen, since most single-precision implementations don't 
    tend to only set the lowest bits of a NaN mantissa.
  - The implementation does not provide any floating point exceptions, thus 
    arithmetic operations or mathematical functions are not specified to invoke 
    proper floating point exceptions. But due to many functions implemented in 
    single-precision, those may still invoke floating point exceptions of the 
    underlying single-precision implementation.
 Some of those points could have been circumvented by controlling the floating 
 point environment using <cfenv> or implementing a similar exception mechanism. 
 But this would have required excessive runtime checks giving two high an impact 
 on performance for something that is rarely ever needed. If you really need to 
 rely on proper floating point exceptions, it is recommended to explicitly 
 perform computations using the built-in floating point types to be on the safe 
 side. In the same way, if you really need to rely on a particular rounding 
 behaviour, it is recommended to either use single-precision computations and 
 explicitly convert the result to half-precision using 'half_cast' and 
 specifying the desired rounding mode, or synchronize the default half-precision 
 rounding mode to the rounding mode of the single-precision implementation (most 
 likely 'HALF_ROUND_STYLE=1', 'HALF_ROUND_TIES_TO_EVEN=1'). But this is really 
 considered an expert-scenario that should be used only when necessary, since 
 actually working with half-precision usually comes with a certain 
 tolerance/ignorance of exactness considerations and proper rounding comes with 
 a certain performance cost.
 CREDITS AND CONTACT
 -------------------
 This library is developed by CHRISTIAN RAU and released under the MIT License 
 (see LICENSE.txt). If you have any questions or problems with it, feel free to 
 contact me at rauy@users.sourceforge.net.
 Additional credit goes to JEROEN VAN DER ZIJP for his paper on "Fast Half Float 
 Conversions", whose algorithms have been used in the library for converting 
 between half-precision and single-precision values.
--- a/third_party/half/README.txt.license
+++ b/third_party/half/README.txt.license
@ -0,0 +1,4 @@
 #
 # Copyright (c) 2012-2017 Christian Rau
 # SPDX-License-Identifier: MIT
 #
--- a/third_party/half/half.hpp
+++ b/third_party/half/half.hpp