Enable float16 bias convolution model runs on NN (#612)
Convert float16 bias tensor to float32 to meet condition of NN convolution in driver Caution: Clang version requires minimum 15.0 Type: Code Improvement Issue: bugzilla id:32785 | jira id VIVD-744 Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
This commit is contained in:
parent
34812fe40e
commit
33f3a4f176
|
|
@ -5,7 +5,7 @@ on:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
|
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
|
||||||
BUILD_TYPE: Release
|
BUILD_TYPE: Release
|
||||||
|
|
|
||||||
|
|
@ -80,13 +80,14 @@ class Graph {
|
||||||
virtual const std::vector<std::shared_ptr<Tensor>> InputsTensor() const = 0;
|
virtual const std::vector<std::shared_ptr<Tensor>> InputsTensor() const = 0;
|
||||||
virtual const std::vector<std::shared_ptr<Tensor>> OutputsTensor() const = 0;
|
virtual const std::vector<std::shared_ptr<Tensor>> OutputsTensor() const = 0;
|
||||||
|
|
||||||
virtual void UpdateTensorConsumersMap(
|
virtual void UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
|
||||||
const std::shared_ptr<Tensor>& tensor,
|
const Operation* op) = 0;
|
||||||
const Operation* op) = 0;
|
virtual void RenewTensorConsumersMap(
|
||||||
|
const std::shared_ptr<Tensor>& org_tensor,
|
||||||
|
const std::shared_ptr<Tensor>& dst_tensor, const Operation* op) = 0;
|
||||||
|
|
||||||
virtual void UpdateTensorProducerMap(
|
virtual void UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
|
||||||
const std::shared_ptr<Tensor>& tensor,
|
const Operation* op) = 0;
|
||||||
const Operation* op) = 0;
|
|
||||||
|
|
||||||
virtual const std::vector<std::shared_ptr<Operation>> GetConsumersOp(
|
virtual const std::vector<std::shared_ptr<Operation>> GetConsumersOp(
|
||||||
std::shared_ptr<Tensor> tensor) const = 0;
|
std::shared_ptr<Tensor> tensor) const = 0;
|
||||||
|
|
|
||||||
|
|
@ -49,16 +49,16 @@ class Operation {
|
||||||
std::unique_ptr<OpImpl>& impl();
|
std::unique_ptr<OpImpl>& impl();
|
||||||
const std::unique_ptr<OpImpl>& impl() const;
|
const std::unique_ptr<OpImpl>& impl() const;
|
||||||
virtual const std::vector<std::shared_ptr<Tensor>> ConstantInputsTensor() const;
|
virtual const std::vector<std::shared_ptr<Tensor>> ConstantInputsTensor() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool IsAllInputsConst() const;
|
bool IsAllInputsConst() const;
|
||||||
std::unique_ptr<OpImpl> impl_;
|
std::unique_ptr<OpImpl> impl_;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Post processing at the final step on BindInput func
|
// Post processing at the final step on BindInput func
|
||||||
// - tensor : input tensor
|
// - tensor : input tensor
|
||||||
// - input_idx: the index of input tensor
|
// - input_idx: the index of input tensor
|
||||||
virtual void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor, int32_t input_idx);
|
virtual void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor, int32_t input_idx);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace vx
|
} // namespace vx
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ namespace ops {
|
||||||
*
|
*
|
||||||
* Performs a 2-D convolution operation, include classic Conv2D /
|
* Performs a 2-D convolution operation, include classic Conv2D /
|
||||||
* Depthwise Conv2D / Group Conv2D / Dilation Conv2D.
|
* Depthwise Conv2D / Group Conv2D / Dilation Conv2D.
|
||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
* - input [WHCN or CWHN].
|
* - input [WHCN or CWHN].
|
||||||
* - kernel [ WHIcOc ] (Ic: Input Channels. Oc: Output Channels).
|
* - kernel [ WHIcOc ] (Ic: Input Channels. Oc: Output Channels).
|
||||||
|
|
@ -95,6 +95,13 @@ class Conv2d : public BuiltinOp {
|
||||||
const std::array<uint32_t, 4> pad_;
|
const std::array<uint32_t, 4> pad_;
|
||||||
const int32_t multiplier_;
|
const int32_t multiplier_;
|
||||||
const DataLayout kernel_layout_;
|
const DataLayout kernel_layout_;
|
||||||
|
|
||||||
|
#if defined(__clang__) && (__clang_major__ >= 15)
|
||||||
|
#define TIM_VX_OPS_CONV2D_WITH_F16BIAS 1
|
||||||
|
private:
|
||||||
|
void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
|
||||||
|
int32_t input_idx) override;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace ops
|
} // namespace ops
|
||||||
|
|
|
||||||
|
|
@ -195,6 +195,22 @@ void GraphImpl::UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GraphImpl::RenewTensorConsumersMap(
|
||||||
|
const std::shared_ptr<Tensor>& org_tensor,
|
||||||
|
const std::shared_ptr<Tensor>& dst_tensor, const Operation* op) {
|
||||||
|
auto exist_op = std::find_if(
|
||||||
|
op_vector_.begin(), op_vector_.end(),
|
||||||
|
[op](std::shared_ptr<Operation> oper) { return oper.get() == op; });
|
||||||
|
if (exist_op == op_vector_.end()) {
|
||||||
|
return; //given op cannot be found
|
||||||
|
} else {
|
||||||
|
auto consumer_to_remove = tensor_consumers_.find(org_tensor);
|
||||||
|
if (consumer_to_remove != tensor_consumers_.end())
|
||||||
|
tensor_consumers_.erase(consumer_to_remove);
|
||||||
|
tensor_consumers_[dst_tensor].push_back(*exist_op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GraphImpl::UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
|
void GraphImpl::UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
|
||||||
const Operation* op) {
|
const Operation* op) {
|
||||||
for (const auto& added_op : op_vector_) {
|
for (const auto& added_op : op_vector_) {
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,9 @@ class GraphImpl : public Graph {
|
||||||
|
|
||||||
void UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
|
void UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
|
||||||
const Operation* op) override;
|
const Operation* op) override;
|
||||||
|
void RenewTensorConsumersMap(const std::shared_ptr<Tensor>& org_tensor,
|
||||||
|
const std::shared_ptr<Tensor>& dst_tensor,
|
||||||
|
const Operation* op) override;
|
||||||
void UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
|
void UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
|
||||||
const Operation* op) override;
|
const Operation* op) override;
|
||||||
const std::vector<std::shared_ptr<Operation>> GetConsumersOp(
|
const std::vector<std::shared_ptr<Operation>> GetConsumersOp(
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,34 @@ const std::vector<std::shared_ptr<Tensor>> Conv2d::ConstantInputsTensor() const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle float16 bias if clang compiler is no less than 15.0.0 version
|
||||||
|
#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
|
||||||
|
void Conv2d::OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
|
||||||
|
int32_t input_idx) {
|
||||||
|
if (tensor->GetDataType() == vx::DataType::FLOAT16 &&
|
||||||
|
tensor->IsConstTensor() && impl_->inputs_tensor_.size() == 3) {
|
||||||
|
uint32_t bias_size = 1;
|
||||||
|
for (auto i : tensor->GetShape()) {
|
||||||
|
bias_size *= i;
|
||||||
|
}
|
||||||
|
std::vector<_Float16> in(bias_size);
|
||||||
|
tensor->CopyDataFromTensor(in.data());
|
||||||
|
|
||||||
|
std::vector<float> out(bias_size);
|
||||||
|
for (uint i = 0; i < bias_size; i++) {
|
||||||
|
out[i] = static_cast<float>(in[i]);
|
||||||
|
}
|
||||||
|
TensorSpec fp32bias_spec(tim::vx::DataType::FLOAT32, tensor->GetShape(),
|
||||||
|
tim::vx::TensorAttribute::CONSTANT);
|
||||||
|
auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, out.data());
|
||||||
|
|
||||||
|
impl_->inputs_tensor_[2] = out_tensor;
|
||||||
|
impl_->node()->input.tensors[input_idx] = out_tensor->GetId();
|
||||||
|
impl_->graph_->RenewTensorConsumersMap(tensor, out_tensor, this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace ops
|
} // namespace ops
|
||||||
} // namespace vx
|
} // namespace vx
|
||||||
} // namespace tim
|
} // namespace tim
|
||||||
|
|
@ -29,6 +29,81 @@
|
||||||
#include "tim/vx/graph.h"
|
#include "tim/vx/graph.h"
|
||||||
#include "tim/vx/types.h"
|
#include "tim/vx/types.h"
|
||||||
|
|
||||||
|
#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
|
||||||
|
TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
|
||||||
|
auto ctx = tim::vx::Context::Create();
|
||||||
|
auto graph = ctx->CreateGraph();
|
||||||
|
|
||||||
|
tim::vx::ShapeType input_shape({4, 2, 1, 1}); //whcn
|
||||||
|
tim::vx::ShapeType weight_shape({2, 2, 1, 3}); //whio
|
||||||
|
tim::vx::ShapeType bias_shape({weight_shape[3]});
|
||||||
|
tim::vx::ShapeType output_shape(
|
||||||
|
{4, 2, weight_shape[3], input_shape[3]}); //whcn
|
||||||
|
|
||||||
|
tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT16, input_shape,
|
||||||
|
tim::vx::TensorAttribute::INPUT);
|
||||||
|
tim::vx::TensorSpec weight_spec(tim::vx::DataType::FLOAT16, weight_shape,
|
||||||
|
tim::vx::TensorAttribute::CONSTANT);
|
||||||
|
tim::vx::TensorSpec bias_spec(tim::vx::DataType::FLOAT16, bias_shape,
|
||||||
|
tim::vx::TensorAttribute::CONSTANT);
|
||||||
|
tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT16, output_shape,
|
||||||
|
tim::vx::TensorAttribute::OUTPUT);
|
||||||
|
|
||||||
|
// Input data nchw
|
||||||
|
std::vector<_Float16> input_data = {
|
||||||
|
1, 1, 1, 1, // row = 1
|
||||||
|
2, 2, 3, 2 // row = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
// weight data oihw
|
||||||
|
std::vector<_Float16> weight_data = {
|
||||||
|
1, 2, 3, 4, //first 2x2 filter
|
||||||
|
-1, 1, -1, 1, // second 2x2 filter
|
||||||
|
-1, -1, 1, 1, // third 2x2 filter
|
||||||
|
};
|
||||||
|
|
||||||
|
// bias data
|
||||||
|
std::vector<_Float16> bias_data = {1, 2, 3};
|
||||||
|
|
||||||
|
// nchw
|
||||||
|
std::vector<_Float16> golden = {// first channel
|
||||||
|
18, 22, 21, 8, 7, 9, 8, 3, 2, 3, 1, -1,
|
||||||
|
// second channel
|
||||||
|
2, 3, 1, 0, 5, 6, 6, 4, -1, -2, -2, 1};
|
||||||
|
|
||||||
|
auto input_tensor = graph->CreateTensor(input_spec);
|
||||||
|
auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data());
|
||||||
|
auto bias_tensor = graph->CreateTensor(bias_spec, bias_data.data());
|
||||||
|
auto output_tensor = graph->CreateTensor(output_spec);
|
||||||
|
|
||||||
|
auto padding = tim::vx::PadType::SAME;
|
||||||
|
std::array<uint32_t, 2> stride({1, 1});
|
||||||
|
std::array<uint32_t, 2> dilation({0, 0});
|
||||||
|
|
||||||
|
auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
|
||||||
|
padding, stride, dilation);
|
||||||
|
(*conv2d)
|
||||||
|
.BindInput(input_tensor)
|
||||||
|
.BindInput(weight_tensor)
|
||||||
|
.BindInput(bias_tensor)
|
||||||
|
.BindOutput(output_tensor);
|
||||||
|
|
||||||
|
EXPECT_TRUE(graph->Compile());
|
||||||
|
|
||||||
|
input_tensor->CopyDataToTensor(input_data.data());
|
||||||
|
|
||||||
|
EXPECT_TRUE(graph->Run());
|
||||||
|
|
||||||
|
uint32_t output_size = 1;
|
||||||
|
for (auto i : output_tensor->GetShape()) {
|
||||||
|
output_size *= i;
|
||||||
|
}
|
||||||
|
std::vector<_Float16> output(output_size);
|
||||||
|
EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
|
||||||
|
EXPECT_TRUE(ArraysMatch(golden, output, (_Float16)0.1));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) {
|
TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) {
|
||||||
auto ctx = tim::vx::Context::Create();
|
auto ctx = tim::vx::Context::Create();
|
||||||
auto graph = ctx->CreateGraph();
|
auto graph = ctx->CreateGraph();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue