diff --git a/.github/workflows/cmake_x86_vsim.yml b/.github/workflows/cmake_x86_vsim.yml
index 03c3b54..c6ea727 100644
--- a/.github/workflows/cmake_x86_vsim.yml
+++ b/.github/workflows/cmake_x86_vsim.yml
@@ -5,7 +5,7 @@ on:
     branches: [ main ]
   workflow_dispatch:
     branches: [ main ]
-    
+
 env:
   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
   BUILD_TYPE: Release
diff --git a/include/tim/vx/graph.h b/include/tim/vx/graph.h
index 5d594be..14ee305 100644
--- a/include/tim/vx/graph.h
+++ b/include/tim/vx/graph.h
@@ -80,13 +80,14 @@ class Graph {
   virtual const std::vector<std::shared_ptr<Tensor>> InputsTensor() const = 0;
   virtual const std::vector<std::shared_ptr<Tensor>> OutputsTensor() const = 0;
 
-  virtual void UpdateTensorConsumersMap(
-      const std::shared_ptr<Tensor>& tensor,
-      const Operation* op) = 0;
+  virtual void UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
+                                        const Operation* op) = 0;
+  virtual void RenewTensorConsumersMap(
+      const std::shared_ptr<Tensor>& org_tensor,
+      const std::shared_ptr<Tensor>& dst_tensor, const Operation* op) = 0;
 
-  virtual void UpdateTensorProducerMap(
-      const std::shared_ptr<Tensor>& tensor,
-      const Operation* op) = 0;
+  virtual void UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
+                                       const Operation* op) = 0;
 
   virtual const std::vector<std::shared_ptr<Operation>> GetConsumersOp(
       std::shared_ptr<Tensor> tensor) const = 0;
diff --git a/include/tim/vx/operation.h b/include/tim/vx/operation.h
index df54734..0effef6 100644
--- a/include/tim/vx/operation.h
+++ b/include/tim/vx/operation.h
@@ -49,16 +49,16 @@ class Operation {
   std::unique_ptr<OpImpl>& impl();
   const std::unique_ptr<OpImpl>& impl() const;
   virtual const std::vector<std::shared_ptr<Tensor>> ConstantInputsTensor() const;
-  
+
  protected:
   bool IsAllInputsConst() const;
   std::unique_ptr<OpImpl> impl_;
 
  private:
 // Post processing at the final step on BindInput func
-// - tensor : input tensor  
+// - tensor : input tensor
 // - input_idx: the index of input tensor
-   virtual void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor, int32_t input_idx); 
+   virtual void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor, int32_t input_idx);
 };
 
 }  // namespace vx
diff --git a/include/tim/vx/ops/conv2d.h b/include/tim/vx/ops/conv2d.h
index 1c77394..d9829e6 100644
--- a/include/tim/vx/ops/conv2d.h
+++ b/include/tim/vx/ops/conv2d.h
@@ -37,7 +37,7 @@ namespace ops {
  *
  * Performs a 2-D convolution operation, include classic Conv2D /
  * Depthwise Conv2D / Group Conv2D / Dilation Conv2D.
- * 
+ *
  * Input:
  * - input [WHCN or CWHN].
  * - kernel [ WHIcOc ] (Ic: Input Channels. Oc: Output Channels).
@@ -95,6 +95,13 @@ class Conv2d : public BuiltinOp {
   const std::array<uint32_t, 4> pad_;
   const int32_t multiplier_;
   const DataLayout kernel_layout_;
+
+#if defined(__clang__) && (__clang_major__ >= 15)
+#define TIM_VX_OPS_CONV2D_WITH_F16BIAS 1
+ private:
+  void OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
+                           int32_t input_idx) override;
+#endif
 };
 
 }  // namespace ops
diff --git a/src/tim/vx/graph.cc b/src/tim/vx/graph.cc
index bab4d0b..dfafb59 100644
--- a/src/tim/vx/graph.cc
+++ b/src/tim/vx/graph.cc
@@ -195,6 +195,22 @@ void GraphImpl::UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
   }
 }
 
+void GraphImpl::RenewTensorConsumersMap(
+    const std::shared_ptr<Tensor>& org_tensor,
+    const std::shared_ptr<Tensor>& dst_tensor, const Operation* op) {
+  auto exist_op = std::find_if(
+      op_vector_.begin(), op_vector_.end(),
+      [op](std::shared_ptr<Operation> oper) { return oper.get() == op; });
+  if (exist_op == op_vector_.end()) {
+    return;  //given op cannot be found
+  } else {
+    auto consumer_to_remove = tensor_consumers_.find(org_tensor);
+    if (consumer_to_remove != tensor_consumers_.end())
+      tensor_consumers_.erase(consumer_to_remove);
+    tensor_consumers_[dst_tensor].push_back(*exist_op);
+  }
+}
+
 void GraphImpl::UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
                                          const Operation* op) {
   for (const auto& added_op : op_vector_) {
diff --git a/src/tim/vx/graph_private.h b/src/tim/vx/graph_private.h
index 5270260..230339b 100644
--- a/src/tim/vx/graph_private.h
+++ b/src/tim/vx/graph_private.h
@@ -62,6 +62,9 @@ class GraphImpl : public Graph {
 
   void UpdateTensorConsumersMap(const std::shared_ptr<Tensor>& tensor,
                                 const Operation* op) override;
+  void RenewTensorConsumersMap(const std::shared_ptr<Tensor>& org_tensor,
+                               const std::shared_ptr<Tensor>& dst_tensor,
+                               const Operation* op) override;
   void UpdateTensorProducerMap(const std::shared_ptr<Tensor>& tensor,
                                 const Operation* op) override;
   const std::vector<std::shared_ptr<Operation>> GetConsumersOp(
diff --git a/src/tim/vx/ops/conv2d.cc b/src/tim/vx/ops/conv2d.cc
index 0fe2f0f..4f93b1c 100644
--- a/src/tim/vx/ops/conv2d.cc
+++ b/src/tim/vx/ops/conv2d.cc
@@ -96,6 +96,34 @@ const std::vector<std::shared_ptr<Tensor>> Conv2d::ConstantInputsTensor() const
   }
 }
 
+// Handle float16 bias if clang compiler is no less than 15.0.0 version
+#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
+void Conv2d::OnBindInputPostProc(const std::shared_ptr<Tensor>& tensor,
+                                 int32_t input_idx) {
+  if (tensor->GetDataType() == vx::DataType::FLOAT16 &&
+      tensor->IsConstTensor() && impl_->inputs_tensor_.size() == 3) {
+    uint32_t bias_size = 1;
+    for (auto i : tensor->GetShape()) {
+      bias_size *= i;
+    }
+    std::vector<_Float16> in(bias_size);
+    tensor->CopyDataFromTensor(in.data());
+
+    std::vector<float> out(bias_size);
+    for (uint i = 0; i < bias_size; i++) {
+      out[i] = static_cast<float>(in[i]);
+    }
+    TensorSpec fp32bias_spec(tim::vx::DataType::FLOAT32, tensor->GetShape(),
+                             tim::vx::TensorAttribute::CONSTANT);
+    auto out_tensor = impl_->graph_->CreateTensor(fp32bias_spec, out.data());
+
+    impl_->inputs_tensor_[2] = out_tensor;
+    impl_->node()->input.tensors[input_idx] = out_tensor->GetId();
+    impl_->graph_->RenewTensorConsumersMap(tensor, out_tensor, this);
+  }
+}
+#endif
+
 }  // namespace ops
 }  // namespace vx
 }  // namespace tim
\ No newline at end of file
diff --git a/src/tim/vx/ops/conv2d_test.cc b/src/tim/vx/ops/conv2d_test.cc
index ca00178..77db127 100644
--- a/src/tim/vx/ops/conv2d_test.cc
+++ b/src/tim/vx/ops/conv2d_test.cc
@@ -29,6 +29,81 @@
 #include "tim/vx/graph.h"
 #include "tim/vx/types.h"
 
+#ifdef TIM_VX_OPS_CONV2D_WITH_F16BIAS
+TEST(Conv2d, shape_4_2_1_1_float16_PaddingTest) {
+  auto ctx = tim::vx::Context::Create();
+  auto graph = ctx->CreateGraph();
+
+  tim::vx::ShapeType input_shape({4, 2, 1, 1});   //whcn
+  tim::vx::ShapeType weight_shape({2, 2, 1, 3});  //whio
+  tim::vx::ShapeType bias_shape({weight_shape[3]});
+  tim::vx::ShapeType output_shape(
+      {4, 2, weight_shape[3], input_shape[3]});  //whcn
+
+  tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT16, input_shape,
+                                 tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec weight_spec(tim::vx::DataType::FLOAT16, weight_shape,
+                                  tim::vx::TensorAttribute::CONSTANT);
+  tim::vx::TensorSpec bias_spec(tim::vx::DataType::FLOAT16, bias_shape,
+                                tim::vx::TensorAttribute::CONSTANT);
+  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT16, output_shape,
+                                  tim::vx::TensorAttribute::OUTPUT);
+
+  // Input data  nchw
+  std::vector<_Float16> input_data = {
+      1, 1, 1, 1,  // row = 1
+      2, 2, 3, 2   // row = 2
+  };
+
+  // weight data   oihw
+  std::vector<_Float16> weight_data = {
+      1,  2,  3,  4,  //first 2x2 filter
+      -1, 1,  -1, 1,  // second 2x2 filter
+      -1, -1, 1,  1,  // third 2x2 filter
+  };
+
+  // bias data
+  std::vector<_Float16> bias_data = {1, 2, 3};
+
+  // nchw
+  std::vector<_Float16> golden = {// first channel
+                               18, 22, 21, 8, 7, 9, 8, 3, 2, 3, 1, -1,
+                               // second channel
+                               2, 3, 1, 0, 5, 6, 6, 4, -1, -2, -2, 1};
+
+  auto input_tensor = graph->CreateTensor(input_spec);
+  auto weight_tensor = graph->CreateTensor(weight_spec, weight_data.data());
+  auto bias_tensor = graph->CreateTensor(bias_spec, bias_data.data());
+  auto output_tensor = graph->CreateTensor(output_spec);
+
+  auto padding = tim::vx::PadType::SAME;
+  std::array<uint32_t, 2> stride({1, 1});
+  std::array<uint32_t, 2> dilation({0, 0});
+
+  auto conv2d = graph->CreateOperation<tim::vx::ops::Conv2d>(
+      padding, stride, dilation);
+  (*conv2d)
+      .BindInput(input_tensor)
+      .BindInput(weight_tensor)
+      .BindInput(bias_tensor)
+      .BindOutput(output_tensor);
+
+  EXPECT_TRUE(graph->Compile());
+
+  input_tensor->CopyDataToTensor(input_data.data());
+
+  EXPECT_TRUE(graph->Run());
+
+  uint32_t output_size = 1;
+  for (auto i : output_tensor->GetShape()) {
+    output_size *= i;
+  }
+  std::vector<_Float16> output(output_size);
+  EXPECT_TRUE(output_tensor->CopyDataFromTensor(output.data()));
+  EXPECT_TRUE(ArraysMatch(golden, output, (_Float16)0.1));
+}
+#endif
+
 TEST(Conv2d, shape_4_2_1_1_float32_PaddingTest) {
   auto ctx = tim::vx::Context::Create();
   auto graph = ctx->CreateGraph();