diff --git a/prebuilt-sdk/x86_64_linux/VERSION b/prebuilt-sdk/x86_64_linux/VERSION
index d8b37f0..79d5c17 100644
--- a/prebuilt-sdk/x86_64_linux/VERSION
+++ b/prebuilt-sdk/x86_64_linux/VERSION
@@ -1 +1 @@
-6.4.12_CL562241A_D561555_A558512_R558399_T558462_Oeb44e5c
+6.4.14_CL650117A_D650117_A648302_R647402_T648811_O646970
\ No newline at end of file
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h
index 8b93beb..d353960 100644
--- a/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h
+++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h
@@ -501,6 +501,8 @@ enum vx_kernel_e {
VX_KERNEL_STREAM_PROCESSOR = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x32,
+ VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x33,
+
VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
};
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h
index 6cf283c..f3f0191 100644
--- a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h
+++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h
@@ -173,7 +173,7 @@ VX_DECONV_3D_API_SUPPORT is used to declare that vsi openvx driver can support d
1: support
*/
#ifndef VX_DECONV_3D_API_SUPPORT
-#define VX_DECONV_3D_API_SUPPORT 0
+#define VX_DECONV_3D_API_SUPPORT 1
#endif
/*
@@ -237,4 +237,26 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
#define VX_SCALE_EXTRA_PARAMETER_SUPPORT 1
#endif
+/*
+ VX_INVALIDATE_HANDLE_SUPPORT is used to declare that we refined vxSwapTensorHandle API to follow KHR OpenVX 1.3 spec: tensor don't maintain handle internally if new_ptr is NULL.
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_INVALIDATE_HANDLE_SUPPORT
+#define VX_INVALIDATE_HANDLE_SUPPORT 1
+#endif
+
+/*
+ VX_ACTIVATION_EXT2_SUPPORT is used to declare that ACTIVATION can support sign, hard_sigmoid, neg, clip, exp, sin, cos,
+ log, mish, gelu, hgelu, elu, selu, celu, rcp, softsign, atan, atanh, acosh, inverse sigmoid, round and erf.
+ [value]
+ 0: not support
+ 1: support
+*/
+#ifndef VX_ACTIVATION_EXT2_SUPPORT
+#define VX_ACTIVATION_EXT2_SUPPORT 1
+#endif
+
+
#endif /* __VX_KHR_COMPATIBLE_H__ */
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn.h
index c329f8c..a43a37e 100644
--- a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn.h
+++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn.h
@@ -219,6 +219,28 @@ enum vx_nn_activation_function_e
VX_NN_ACTIVATION_HSWISH = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x6,
VX_NN_ACTIVATION_CUSTOM = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x7,
VX_NN_ACTIVATION_NONE = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x8,
+ VX_NN_ACTIVATION_SIGN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x9,
+ VX_NN_ACTIVATION_HSIGMOID_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xa,
+ VX_NN_ACTIVATION_NEG_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xb,
+ VX_NN_ACTIVATION_CLIP_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xc,
+ VX_NN_ACTIVATION_EXP_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xd,
+ VX_NN_ACTIVATION_SIN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xe,
+ VX_NN_ACTIVATION_COS_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xf,
+ VX_NN_ACTIVATION_LOG_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x10,
+ VX_NN_ACTIVATION_MISH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x11,
+ VX_NN_ACTIVATION_GELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x12,
+ VX_NN_ACTIVATION_HGELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x13,
+ VX_NN_ACTIVATION_ELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x14,
+ VX_NN_ACTIVATION_SELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x15,
+ VX_NN_ACTIVATION_CELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x16,
+ VX_NN_ACTIVATION_RECIPROCAL_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x17,
+ VX_NN_ACTIVATION_SOFTSIGN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x18,
+ VX_NN_ACTIVATION_ATAN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x19,
+ VX_NN_ACTIVATION_ATANH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1a,
+ VX_NN_ACTIVATION_ACOSH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1b,
+ VX_NN_ACTIVATION_INVERSE_SIGMOID_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1c,
+ VX_NN_ACTIVATION_ROUND_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1d,
+ VX_NN_ACTIVATION_ERF_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1e,
};
/*! \brief The Convolutional network type
@@ -623,6 +645,19 @@ VX_API_ENTRY vx_tensor VX_API_CALL vxCreateTensorFromHandle2(
* \retval VX_ERROR_INVALID_REFERENCE tensor is not a valid \ref vx_tensor \ref vx_imagereference created from Handle.
*/
VX_API_ENTRY vx_status VX_API_CALL vxFlushHandle(vx_reference ref);
+/* !\brief Same as vxFlushHandle() also added by Verisilicon as extension API.
+ */
+VX_API_ENTRY vx_status VX_API_CALL vxFlushHandleVSI(vx_reference ref);
+
+#if defined(VX_INVALIDATE_HANDLE_SUPPORT) && VX_INVALIDATE_HANDLE_SUPPORT
+/*! \brief Invalidate the memory referenced by reference's handle when it is ready.
+* added by Versilicon as extension API.
+* \param [in] ref The reference(image or tensor) which created from handle.
+* \return A \ref vx_status_e enumeration.;
+* \retval VX_ERROR_INVALID_REFERENCE tensor is not a valid \ref vx_tensor \ref vx_imagereference created from Handle.
+*/
+VX_API_ENTRY vx_status VX_API_CALL vxInvalidateHandleVSI(vx_reference ref);
+#endif
#if VX_VA40_EXT_SUPPORT
/*! \brief Return a new tensor referencing the same memory location but with different shape.
@@ -776,6 +811,14 @@ typedef struct _vx_nn_convolution_params_ext2_t
vx_int32 depth_multiplier; /*!< \brief depthwise multiplier value, if 0, means convolution, elsewise(>=1), the convolution is depthwiseconvolution. */
} vx_nn_convolution_params_ext2_t;
+
+typedef struct _vx_nn_convolution_params_ext3_t
+{
+ vx_nn_convolution_params_ext2_t ext2; /*!< \brief Convolution extension structure head */
+
+ vx_bool isPPU; /*!< \brief merge convolution and relu for PPU. */
+} vx_nn_convolution_params_ext3_t;
+
/*==============================================================================
NN Nodes
=============================================================================*/
@@ -2142,7 +2185,8 @@ typedef struct _vx_hardware_caps_params_ext_t
typedef struct _vx_hardware_caps_params_ext2_t
{
vx_hardware_caps_params_ext_t base;
- vx_uint32 streamProcessorExecCount; /*!< \brief streamprocess execution count. */
+ vx_uint32 streamProcessorExecCount; /*!< \brief stream processor execution count. */
+ vx_uint32 streamProcessorVectorSize; /*!< \brief stream processor vector size. */
} vx_hardware_caps_params_ext2_t;
/*! \brief Queries hardware caps information.
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h
index df2c517..6570e1d 100644
--- a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h
+++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h
@@ -236,6 +236,12 @@ typedef struct _vx_nn_convolution_relu_pooling_params_ext6_t
} vx_nn_convolution_relu_pooling_params_ext6_t, * vx_nn_convolution_relu_pooling_params_ext6;;
+typedef struct _vx_nn_convolution_relu_pooling_params_ext7_t
+{
+ vx_nn_convolution_relu_pooling_params_ext6_t ext6; /*!< \brief convolution relu pooling params \ref vx_nn_convolution_relu_pooling_params_ext_t */
+ vx_bool isSub;
+} vx_nn_convolution_relu_pooling_params_ext7_t, * vx_nn_convolution_relu_pooling_params_ext7;
+
/*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion.
* \details This function implement Convolutional Network Convolution and Activation(Relu) and Pooling layer.
* For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined,
@@ -1081,6 +1087,48 @@ VX_API_ENTRY vx_node VX_API_CALL vxTensorTableLookupLayer(
vx_lut OutLut,
vx_tensor output);
+typedef struct _vx_nn_gemm_relu_pooling_params_t
+{
+ vx_bool enable_relu; /*!< \brief Enable Relu layer function or not. */
+ vx_bool enable_leaky_relu; /*!< \brief Enable LeakyRelu layer function or not. */
+ vx_float32 alpha; /*!< \brief Alpha value for Activation */
+ vx_float32 beta; /*!< \brief Beta value for Activation */
+ vx_uint32 node_count; /*!< \brief node count to merge */
+ vx_float32 merged_scale[MERGED_NODE_COUNT_MAX]; /*!< \brief scale of merged node output */
+ vx_int32 merged_zero_point[MERGED_NODE_COUNT_MAX]; /*!< \brief zero point of merged node output */
+ vx_enum merged_data_type[MERGED_NODE_COUNT_MAX]; /*!< \brief data type of merged node output */
+ vx_enum act_func; /*!< \brief nn activation function */
+ vx_lut lut_in; /*!< \brief LUT in */
+ vx_lut lut_out; /*!< \brief LUT out */
+ vx_bool enbale_const_multiplier; /*!< \brief tensor mul with one of inputs as a single pixel const tensor */
+ vx_float32 const_multiplier; /*!< \brief const multiplier */
+} vx_nn_gemm_relu_pooling_params_t, * vx_nn_gemm_relu_pooling_params;
+
+/*! \brief Create a batch gemm node, the calcution formula is output = matrix_a * matrix_b + matrix_c.
+ * \param [in] graph The reference to the graph.
+ * \param [in] matrix_a The first input tensor.
+ * \param [in] matrix_b The second input tensor. Must be in the same data type and batch count as first input tensor.
+ * \param [in] matrix_c The third input tensor. Must be in the same data type and batch count as first input tensor. [optional]
+ * \param [in] trans_a If true, the matrix_a has been transposed before calcution.
+ * \param [in] trans_b If true, the matrix_b has been transposed before calcution.
+ * \param [in] trans_c If true, the matrix_c has been transposed before calcution. [optional]
+ * \param [in] merge_param the parameters for gemm + op merging
+ * \param [out] output The output tensor. Output dimension must agree the formula in the description.
+ * \return \ref vx_node.
+ * \retval vx_node A node reference. Any possible errors preventing a successful creation
+ * should be checked using \ref vxGetStatus
+ * \ingroup group_vision_function_gemm
+ */
+VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingLayer(vx_graph graph,
+ vx_tensor matrix_a,
+ vx_tensor matrix_b,
+ vx_tensor matrix_c,
+ vx_scalar trans_a,
+ vx_scalar trans_b,
+ vx_scalar trans_c,
+ const vx_nn_gemm_relu_pooling_params merge_param,
+ vx_tensor output);
+
#ifdef __cplusplus
}
#endif
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_spinst.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_spinst.h
index 867b8ce..36df374 100644
--- a/prebuilt-sdk/x86_64_linux/include/VX/vx_spinst.h
+++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_spinst.h
@@ -165,6 +165,7 @@ typedef enum _vx_sp_attribute_e
VX_SP_ATTRIBUTE_SUM_ENGINE_CONTROL,
VX_SP_ATTRIBUTE_SUM_ENGINE_NUM_CH_MINUS_ONE,
VX_SP_ATTRIBUTE_SUM_ENGINE_2D_ACCUM_STORAGE,
+ VX_SP_ATTRIBUTE_SUM_ENGINE_OP_SELECT,
VX_SP_ATTRIBUTE_NUM_OF_ELEMENTS_PER_LOOP_PER_INPUT,
@@ -181,6 +182,18 @@ typedef enum _vx_sp_attribute_e
VX_SP_ATTRIBUTE_CONST3, /* NN clamp max */
VX_SP_ATTRIBUTE_CONST4, /* NN clmap min */
+ VX_SP_ATTRIBUTE_CONST_COUNT,
+
+ VX_SP_ATTRIBUTE_SPLIT_AXIS,
+ VX_SP_ATTRIBUTE_SPLIT_MAX_SIZE,
+ VX_SP_ATTRIBUTE_SPLIT_TILEX_EQUAL_INIMAGEX,
+
+ VX_SP_ATTRIBUTE_NOT_MERGE_CONVSP,
+ VX_SP_ATTRIBUTE_UPDATE_CONST0_TO_PCQ_COEF_TENSOR,
+ VX_SP_ATTRIBUTE_RESHAPE_ARRAY, /* bit layout | output:24-29 | input3:18-23 | input2:12-17 | input1:6-11 | input0:0-5 | */
+ VX_SP_ATTRIBUTE_ALIGN_SP_CORE_AXIS,
+ VX_SP_ATTRIBUTE_KEEP_TILE_SIZE,
+
VX_SP_ATTRIBUTE_TOTAL_COUNT,
}
vx_sp_attribute_e;
@@ -274,9 +287,55 @@ typedef enum _vx_sp_attribute_sum_engine_2d_accum_storage_e
}
vx_sp_attribute_sum_engine_2d_accum_storage_e;
+typedef enum _vx_sp_attribute_sum_engine_op_select_e
+{
+ VX_SP_ATTRIBUTE_SUM_ENGINE_SUM_OP,
+ VX_SP_ATTRIBUTE_SUM_ENGINE_MAX_OP
+} vx_sp_attribute_sum_engine_op_select_e;
+
+typedef enum _vx_sp_attribute_reshape_e
+{
+ VX_SP_ATTRIBUTE_RESHAPE_CHW2CHW = 0x00,
+ VX_SP_ATTRIBUTE_RESHAPE_CHW2WHC = 0x06,
+ VX_SP_ATTRIBUTE_RESHAPE_CHW2WCH = 0x09,
+ VX_SP_ATTRIBUTE_RESHAPE_CHW2HWC = 0x12,
+ VX_SP_ATTRIBUTE_RESHAPE_CHW2HCW = 0x18,
+ VX_SP_ATTRIBUTE_RESHAPE_CHW2CWH = 0x21,
+}
+vx_sp_attribute_reshape_e;
+
+typedef enum _vx_sp_attribute_split_axis_e
+{
+ VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_X,
+ VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_Y,
+ VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_Z,
+ VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_XY,
+ VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_YZ,
+ VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_XYZ,
+}
+vx_sp_attribute_split_axis_e;
+
+typedef enum _vx_sp_attribute_tile_align_sp_core_e
+{
+ VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_NONE = 0,
+ VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_X,
+ VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_Y,
+ VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_XY,
+}
+vx_sp_attribute_tile_align_sp_core_e;
+
+typedef enum _vx_sp_attribute_keep_tile_size_e
+{
+ VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_NONE = 0,
+ VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_X,
+ VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_Y,
+ VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_XY,
+}
+vx_sp_attribute_keep_tile_size_e;
+
/**********************************************************************************************/
-/*! \brief Creates an opaque reference to a spinst data.
+/*! \brief Creates an external reference to a spinst data.
* \param [in] context The reference to the implementation context.
* \return A spinst data reference.
* \Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
@@ -286,7 +345,17 @@ VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINST(
vx_context context
);
-/*! \brief Releases a reference to a spinst object.
+/*! \brief Creates an internal reference to a spinst data.
+ * \param [in] context The reference to the implementation context.
+ * \return A spinst data reference.
+ * \Any possible errors preventing a successful creation should be checked using \ref vxGetStatus.
+ * \ingroup group_object_spinst
+ */
+VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINSTInternal(
+ vx_context context
+ );
+
+/*! \brief Releases a reference to a external spinst object.
* The object may not be garbage collected until its total reference count is zero.
* \param [in] spinst_obj The pointer to the spinst data to release.
* \post After returning from this function the reference is zeroed.
@@ -299,6 +368,19 @@ VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINST(
vx_spinst *spinst_obj
);
+/*! \brief Releases a reference to a internal spinst object.
+ * The object may not be garbage collected until its total reference count is zero.
+ * \param [in] spinst_obj The pointer to the spinst data to release.
+ * \post After returning from this function the reference is zeroed.
+ * \return A \ref vx_status_e enumeration.
+ * \retval VX_SUCCESS No errors; all other values indicate failure
+ * \retval * An error occurred. See \ref vx_status_e.
+ * \ingroup group_object_spinst
+ */
+VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINSTInternal(
+ vx_spinst *spinst_obj
+ );
+
/*! \brief Add a instruction to spinst object.
* \param [in] spinst_obj The reference to the spinst object.
* \param [in] inst_unit_array The units of one instruction. Use a \ref vx_spinst_unit_param.
@@ -332,6 +414,12 @@ VX_API_ENTRY vx_status VX_API_CALL vxSetAttributeToSPINST(
vx_uint32 value
);
+VX_API_ENTRY vx_status VX_API_CALL vxGetAttributeToSPINST(
+ vx_spinst spinst_obj,
+ vx_enum attribute,
+ vx_uint32* value
+);
+
#ifdef __cplusplus
}
#endif
diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h
index 6ce6f8a..6f75ea9 100644
--- a/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h
+++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h
@@ -539,6 +539,15 @@ typedef vx_enum vx_action;
*/
typedef vx_action (VX_CALLBACK *vx_nodecomplete_f)(vx_node node);
+/*! \brief A callback to the client for querying information of a node.
+ * \see vx_action
+ * \see vxAssignNodeCallback
+ * \param [in] node The node to which the callback was attached.
+ * \return An action code from \ref vx_action_e.
+ * \ingroup group_node_callback
+ */
+typedef vx_status (VX_CALLBACK *vx_nodequery_f)(vx_node node);
+
/*! \brief Vendor IDs are 2 nibbles in size and are located in the upper byte of
* the 4 bytes of an enumeration.
* \ingroup group_basic_features
@@ -1028,6 +1037,11 @@ enum vx_node_attribute_e {
VX_NODE_ATTRIBUTE_FOR_HW_QUALITY = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0xA,
+ VX_NODE_SWTILING_TILE_XY = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x10,
+ VX_NODE_SPINST_INDEX = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x11,
+ VX_NODE_SPCONV_PCQ_REPLACE_SPINST = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x12,
+ VX_NODE_SP_NAME = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x13,
+ VX_NODE_SPINST = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x14,
};
/*! \brief The parameter attributes list
diff --git a/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so b/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so
index adc7bf9..0e20368 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so and b/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so differ
diff --git a/prebuilt-sdk/x86_64_linux/lib/libCLC.so b/prebuilt-sdk/x86_64_linux/lib/libCLC.so
index 526ed39..9c88390 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libCLC.so and b/prebuilt-sdk/x86_64_linux/lib/libCLC.so differ
diff --git a/prebuilt-sdk/x86_64_linux/lib/libEmulator.so b/prebuilt-sdk/x86_64_linux/lib/libEmulator.so
index 7f7cd1e..96a5ab4 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libEmulator.so and b/prebuilt-sdk/x86_64_linux/lib/libEmulator.so differ
diff --git a/prebuilt-sdk/x86_64_linux/lib/libGAL.so b/prebuilt-sdk/x86_64_linux/lib/libGAL.so
index f699122..06525da 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libGAL.so and b/prebuilt-sdk/x86_64_linux/lib/libGAL.so differ
diff --git a/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so b/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so
index c03c624..1566bab 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so and b/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so differ
diff --git a/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0 b/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0
index f709ed4..71f3384 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0 and b/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0 differ
diff --git a/prebuilt-sdk/x86_64_linux/lib/libOpenVXU.so b/prebuilt-sdk/x86_64_linux/lib/libOpenVXU.so
index 2339562..9b7e0ca 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libOpenVXU.so and b/prebuilt-sdk/x86_64_linux/lib/libOpenVXU.so differ
diff --git a/prebuilt-sdk/x86_64_linux/lib/libVSC.so b/prebuilt-sdk/x86_64_linux/lib/libVSC.so
index ebab842..1bafe16 100755
Binary files a/prebuilt-sdk/x86_64_linux/lib/libVSC.so and b/prebuilt-sdk/x86_64_linux/lib/libVSC.so differ
diff --git a/src/tim/CMakeLists.txt b/src/tim/CMakeLists.txt
index 1172297..d0a7eb9 100644
--- a/src/tim/CMakeLists.txt
+++ b/src/tim/CMakeLists.txt
@@ -31,6 +31,7 @@ if(${TIM_VX_USE_EXTERNAL_OVXLIB})
set(OVXLIB_INCLUDE_DIR ${OVXLIB_INC})
else()
set(OVXLIB_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/src/tim/vx/internal/include")
+ list(APPEND OVXLIB_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/src/tim/vx/internal/src")
endif()
message(STATUS "OVXLIB include directory: ${OVXLIB_INCLUDE_DIR}")
diff --git a/src/tim/vx/internal/BUILD b/src/tim/vx/internal/BUILD
index 392f1ec..f41b1cd 100644
--- a/src/tim/vx/internal/BUILD
+++ b/src/tim/vx/internal/BUILD
@@ -69,7 +69,6 @@ filegroup(
"src/custom/ops/*.c",
"src/custom/ops/kernel/evis/*.c",
"src/custom/ops/kernel/cl/*.c",
- "src/custom/ops/kernel/cpu/*.c",
])
)
@@ -84,6 +83,7 @@ cc_library(
linkstatic = True,
includes = [
"include",
+ "src",
],
hdrs = [
"include/vsi_nn_pub.h",
@@ -104,6 +104,7 @@ cc_library(
"include/vsi_nn_compatibility.h",
"include/vsi_nn_assert.h",
"include/vsi_nn_feature.h",
+ "include/vsi_nn_post.h",
"include/vsi_nn_rnn.h",
"include/vsi_nn_rnn_helper.h",
"include/vsi_nn_rnn_prv.h",
@@ -121,13 +122,15 @@ cc_library(
"include/utils/vsi_nn_limits.h",
"include/utils/vsi_nn_dtype_util.h",
"include/utils/vsi_nn_dtype_util_prv.h",
- "include/utils/vsi_nn_vdata.h",
"include/utils/vsi_nn_tensor_op.h",
+ "include/utils/vsi_nn_dlfcn.h",
"include/utils/vsi_nn_shape_util.h",
"include/utils/vsi_nn_constraint_check.h",
"include/quantization/vsi_nn_asymmetric_affine.h",
"include/quantization/vsi_nn_dynamic_fixed_point.h",
"include/quantization/vsi_nn_perchannel_symmetric_affine.h",
+ "include/post/vsi_nn_post_fasterrcnn.h",
+ "include/post/vsi_nn_post_cmupose.h",
"include/interface/ops.def",
"include/kernel/vsi_nn_kernel.h",
"include/kernel/vsi_nn_gpu.h",
@@ -168,6 +171,9 @@ cc_library(
"src/vsi_nn_daemon.c",
"src/vsi_nn_graph_optimization.c",
"src/vsi_nn_pre_post_process.c",
+ "src/vsi_nn_tensor_util_prv.h",
+ "src/vsi_nn_types_prv.h",
+ "src/vsi_nn_kernel_prv.h",
"src/utils/vsi_nn_link_list.c",
"src/utils/vsi_nn_util.c",
"src/utils/vsi_nn_math.c",
@@ -177,14 +183,16 @@ cc_library(
"src/utils/vsi_nn_hashmap.c",
"src/utils/vsi_nn_limits.c",
"src/utils/vsi_nn_dtype_util.c",
- "src/utils/vsi_nn_vdata.c",
"src/utils/vsi_nn_tensor_op.c",
+ "src/utils/vsi_nn_dlfcn.c",
"src/utils/vsi_nn_shape_util.c",
"src/utils/vsi_nn_dtype.c",
"src/utils/vsi_nn_constraint_check.c",
"src/quantization/vsi_nn_asymmetric_affine.c",
"src/quantization/vsi_nn_dynamic_fixed_point.c",
"src/quantization/vsi_nn_perchannel_symmetric_affine.c",
+ "src/post/vsi_nn_post_fasterrcnn.c",
+ "src/post/vsi_nn_post_cmupose.c",
"src/kernel/vsi_nn_kernel.c",
"src/kernel/vsi_nn_kernel_util.c",
"src/kernel/vsi_nn_kernel_backend.c",
@@ -202,4 +210,3 @@ cc_library(
+ [":custom_srcs"],
deps = ["//prebuilt-sdk:VIV_SDK_LIB"]
)
-
diff --git a/src/tim/vx/internal/include/custom/custom_node_type.def b/src/tim/vx/internal/include/custom/custom_node_type.def
index 0283c71..90d7727 100644
--- a/src/tim/vx/internal/include/custom/custom_node_type.def
+++ b/src/tim/vx/internal/include/custom/custom_node_type.def
@@ -5,3 +5,4 @@ DEF_NODE_TYPE(custom_softmax)
DEF_NODE_TYPE(custom_ainr_denoise_postprocess)
DEF_NODE_TYPE(custom_warp_affine)
DEF_NODE_TYPE(custom_warp_perspective)
+DEF_NODE_TYPE(custom_sample)
diff --git a/src/tim/vx/internal/include/custom/custom_ops.def b/src/tim/vx/internal/include/custom/custom_ops.def
index 690b057..0050439 100644
--- a/src/tim/vx/internal/include/custom/custom_ops.def
+++ b/src/tim/vx/internal/include/custom/custom_ops.def
@@ -5,3 +5,4 @@ DEF_OP(CUSTOM_SOFTMAX)
DEF_OP(CUSTOM_AINR_DENOISE_POSTPROCESS)
DEF_OP(CUSTOM_WARP_AFFINE)
DEF_OP(CUSTOM_WARP_PERSPECTIVE)
+DEF_OP(CUSTOM_SAMPLE)
diff --git a/src/tim/vx/internal/include/custom/ops/vsi_nn_op_custom_sample.h b/src/tim/vx/internal/include/custom/ops/vsi_nn_op_custom_sample.h
new file mode 100644
index 0000000..d15fa0b
--- /dev/null
+++ b/src/tim/vx/internal/include/custom/ops/vsi_nn_op_custom_sample.h
@@ -0,0 +1,35 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#ifndef _VSI_NN_OP_CUSTOM_SAMPLE_H
+#define _VSI_NN_OP_CUSTOM_SAMPLE_H
+
+#include "vsi_nn_platform.h"
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_custom_sample_param
+{
+ int32_t axis;
+} vsi_nn_custom_sample_param;
+
+#endif
diff --git a/src/tim/vx/internal/include/custom/vsi_nn_custom_node_type.h b/src/tim/vx/internal/include/custom/vsi_nn_custom_node_type.h
index 1a05c8a..8976be3 100644
--- a/src/tim/vx/internal/include/custom/vsi_nn_custom_node_type.h
+++ b/src/tim/vx/internal/include/custom/vsi_nn_custom_node_type.h
@@ -30,5 +30,6 @@
#include "custom/ops/vsi_nn_op_custom_ainr_denoise_postprocess.h"
#include "custom/ops/vsi_nn_op_custom_warp_affine.h"
#include "custom/ops/vsi_nn_op_custom_warp_perspective.h"
+#include "custom/ops/vsi_nn_op_custom_sample.h"
#endif
diff --git a/src/tim/vx/internal/include/interface/ops.def b/src/tim/vx/internal/include/interface/ops.def
old mode 100644
new mode 100755
index 045eb95..82d843f
--- a/src/tim/vx/internal/include/interface/ops.def
+++ b/src/tim/vx/internal/include/interface/ops.def
@@ -183,3 +183,13 @@ DEF_OP(LPPOOL)
DEF_OP(SCATTER_ELEMENTS)
DEF_OP(PRE_PROCESS_YUV422)
DEF_OP(BUCKETIZE)
+DEF_OP(GLOBALLPPOOL)
+DEF_OP(AVG_POOL3D)
+DEF_OP(ATAN)
+DEF_OP(ATANH)
+DEF_OP(ACOSH)
+DEF_OP(MAXUNPOOL)
+DEF_OP(REVERSESEQUENCE)
+DEF_OP(INVERSE_SIGMOID)
+DEF_OP(GRID_SAMPLE)
+DEF_OP(LPNORM)
diff --git a/src/tim/vx/internal/include/internal/internal_ops.def b/src/tim/vx/internal/include/internal/internal_ops.def
old mode 100644
new mode 100755
index a47559a..de33327
--- a/src/tim/vx/internal/include/internal/internal_ops.def
+++ b/src/tim/vx/internal/include/internal/internal_ops.def
@@ -20,3 +20,4 @@ DEF_OP(SPACE2DEPTH_INTERNAL)
DEF_OP(GRUCELL_H_TIMES_ACTIVATION_R)
DEF_OP(GRUCELL_ACTIVATION_Z_H)
DEF_OP(REDUCE_MEAN_INTERNAL)
+DEF_OP(BILINEAR_GRID_SAMPLE)
diff --git a/src/tim/vx/internal/include/kernel/vsi_nn_gpu.h b/src/tim/vx/internal/include/kernel/vsi_nn_gpu.h
index 3dc44d5..c943343 100644
--- a/src/tim/vx/internal/include/kernel/vsi_nn_gpu.h
+++ b/src/tim/vx/internal/include/kernel/vsi_nn_gpu.h
@@ -59,7 +59,7 @@ typedef struct
gpu_dp_type_e type;
} gpu_dp_inst_t;
-typedef struct
+typedef struct VSI_PUBLIC_TYPE
{
uint32_t dim;
size_t global_offset[GPU_MAX_DIMENSION_SIZE];
diff --git a/src/tim/vx/internal/include/kernel/vsi_nn_kernel.h b/src/tim/vx/internal/include/kernel/vsi_nn_kernel.h
index d2c4e58..c118e13 100644
--- a/src/tim/vx/internal/include/kernel/vsi_nn_kernel.h
+++ b/src/tim/vx/internal/include/kernel/vsi_nn_kernel.h
@@ -51,7 +51,7 @@ typedef enum
VSI_NN_KERNEL_TYPE_SP,
VSI_NN_KERNEL_TYPE_NUM,
VSI_NN_KERNEL_TYPE_NONE = VSI_NN_KERNEL_TYPE_NUM
-} vsi_nn_kernel_type_e;
+} VSI_PUBLIC_TYPE vsi_nn_kernel_type_e;
/** Kernel pirority */
enum
@@ -79,7 +79,7 @@ typedef enum
BOOL8,
I4,
U4,
-} vsi_nn_kernel_dtype_e;
+} VSI_PUBLIC_TYPE vsi_nn_kernel_dtype_e;
typedef enum
{
@@ -98,7 +98,7 @@ typedef enum
VSI_NN_GPU_SOURCE_FMT_CODE = 0,
VSI_NN_GPU_SOURCE_FMT_EXECUTABLE = 1,
VSI_NN_GPU_SOURCE_FMT_NUM
-} vsi_nn_gpu_source_fmt_e;
+} VSI_PUBLIC_TYPE vsi_nn_gpu_source_fmt_e;
typedef char * vsi_nn_kernel_source_t;
typedef uint32_t vsi_nn_kernel_unique_id_t;
@@ -125,7 +125,7 @@ typedef struct
vsi_nn_kernel_source_info_t sources[VSI_NN_GPU_SOURCE_FMT_NUM];
vsi_nn_gpu_source_fmt_e active_source_fmt;
} gpu;
-} vsi_nn_kernel_t;
+} VSI_PUBLIC_TYPE vsi_nn_kernel_t;
typedef struct
{
@@ -172,15 +172,15 @@ typedef struct
int32_t allow_kernel_num;
} vsi_nn_kernel_selector_t;
-typedef void * vsi_nn_kernel_node_param_t;
+typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_node_param_t;
typedef void * vsi_nn_kernel_tensor_t;
-typedef void * vsi_nn_kernel_node_t;
+typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_node_t;
typedef void * vsi_nn_kernel_graph_t;
-typedef void * vsi_nn_kernel_scalar_t;
+typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_scalar_t;
typedef vsi_nn_hashmap_t vsi_nn_kernel_param_t;
diff --git a/src/tim/vx/internal/include/kernel/vsi_nn_kernel_lut.h b/src/tim/vx/internal/include/kernel/vsi_nn_kernel_lut.h
index f413b81..8b8c055 100644
--- a/src/tim/vx/internal/include/kernel/vsi_nn_kernel_lut.h
+++ b/src/tim/vx/internal/include/kernel/vsi_nn_kernel_lut.h
@@ -51,6 +51,10 @@ typedef int32_t vsi_nn_kernel_lut_act_e; enum
VSI_NN_KERNEL_LUT_LINEAR_EXP = 17,
VSI_NN_KERNEL_LUT_LINEAR_RSQRT = 18,
VSI_NN_KERNEL_LUT_LINEAR_SIGMOID = 19,
+ VSI_NN_KERNEL_LUT_ATAN = 20,
+ VSI_NN_KERNEL_LUT_ATANH = 21,
+ VSI_NN_KERNEL_LUT_ACOSH = 22,
+ VSI_NN_KERNEL_LUT_INVERSE_SIGMOID = 23,
};
@@ -67,6 +71,8 @@ typedef struct _vsi_nn_kernel_lut_
typedef struct _vsi_nn_kernel_lut_params
{
vsi_enum act_type;
+ vsi_bool pwl_sign_remove_support;
+ float clamp_min;
float params[16];
} vsi_nn_kernel_lut_params;
diff --git a/src/tim/vx/internal/include/libnnext/vsi_nn_vxkernel.h b/src/tim/vx/internal/include/libnnext/vsi_nn_vxkernel.h
index e486949..9413ede 100644
--- a/src/tim/vx/internal/include/libnnext/vsi_nn_vxkernel.h
+++ b/src/tim/vx/internal/include/libnnext/vsi_nn_vxkernel.h
@@ -47,7 +47,7 @@ typedef struct vsi_nn_kernel_info
vx_kernel_description_t ** kernel;
uint8_t kernel_index;
uint8_t init_index;
-} vsi_nn_kernel_info_t;
+} VSI_PUBLIC_TYPE vsi_nn_kernel_info_t;
uint8_t * vsi_nn_LoadBinarySource
(
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_argmax.h b/src/tim/vx/internal/include/ops/vsi_nn_op_argmax.h
index 2ec4172..cc9f42e 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_argmax.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_argmax.h
@@ -112,6 +112,7 @@ typedef struct _vsi_nn_argmax_param
/* argmax layer local data structure */
vsi_nn_argmax_lcl_data local;
int32_t axis;
+ vsi_bool keep_dims;
} vsi_nn_argmax_param;
#ifdef __cplusplus
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_argmin.h b/src/tim/vx/internal/include/ops/vsi_nn_op_argmin.h
index e9d1b70..87ec5ec 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_argmin.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_argmin.h
@@ -111,6 +111,7 @@ typedef struct _vsi_nn_argmin_param
/* argmin layer local data structure */
vsi_nn_argmin_lcl_data local;
int32_t axis;
+ vsi_bool keep_dims;
} vsi_nn_argmin_param;
#ifdef __cplusplus
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_avg_pool3d.h b/src/tim/vx/internal/include/ops/vsi_nn_op_avg_pool3d.h
new file mode 100644
index 0000000..c224ef1
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_avg_pool3d.h
@@ -0,0 +1,53 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_AVG_POOL3D_H
+#define _VSI_NN_OP_AVG_POOL3D_H
+
+#include "vsi_nn_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_avg_pool3d_param
+{
+ /* round_type is used to calculate the output shape */
+ vsi_nn_round_type_e round_type;
+ uint32_t ksize[3];
+ uint32_t stride[3];
+ /* Pad left, right, top, bottom, front, end */
+ uint32_t pad[6];
+ /* Pad type default value shall be AUTO */
+ vsi_nn_pad_e pad_type;
+ /* Whether include pad pixels when calculating value for the edges */
+ int32_t count_include_pad;
+} vsi_nn_avg_pool3d_param;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/tim/vx/internal/include/utils/vsi_nn_vdata.h b/src/tim/vx/internal/include/ops/vsi_nn_op_bilinear_grid_sample.h
similarity index 74%
rename from src/tim/vx/internal/include/utils/vsi_nn_vdata.h
rename to src/tim/vx/internal/include/ops/vsi_nn_op_bilinear_grid_sample.h
index a0f295f..d04c589 100644
--- a/src/tim/vx/internal/include/utils/vsi_nn_vdata.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_bilinear_grid_sample.h
@@ -21,36 +21,31 @@
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
-#ifndef _VSI_NN_VDATA_H
-#define _VSI_NN_VDATA_H
-#include
-#include
+#ifndef _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
+#define _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
-#include "vsi_nn_graph.h"
-#include "vsi_nn_node.h"
-#include "vsi_nn_tensor.h"
+#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
-OVXLIB_API uint8_t * vsi_nn_VdataCreate
- (
- vsi_nn_graph_t * graph,
- vsi_nn_node_t * node,
- uint32_t * p_stream_size
- );
-OVXLIB_API vsi_nn_tensor_t * vsi_nn_CreateVDataTensor
- (
- vsi_nn_graph_t * graph,
- uint8_t * stream,
- vsi_nn_tensor_attr_t * attr
- );
+typedef struct _vsi_nn_bilinear_grid_sample_param
+{
+ struct _bilinear_grid_sample_local_data_t* local;
+ vsi_bool align_corners;
+ vsi_nn_pad_mode_e padding_mode;
+ int32_t const_val;
+} vsi_nn_bilinear_grid_sample_param;
+
+_compiler_assert(offsetof(vsi_nn_bilinear_grid_sample_param, local) == 0, \
+ vsi_nn_bilinear_grid_sample_h );
#ifdef __cplusplus
}
#endif
#endif
+
diff --git a/src/tim/vx/internal/src/utils/vsi_nn_vdata.c b/src/tim/vx/internal/include/ops/vsi_nn_op_gather_nd.h
similarity index 68%
rename from src/tim/vx/internal/src/utils/vsi_nn_vdata.c
rename to src/tim/vx/internal/include/ops/vsi_nn_op_gather_nd.h
index c3171b6..06e6599 100644
--- a/src/tim/vx/internal/src/utils/vsi_nn_vdata.c
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_gather_nd.h
@@ -21,34 +21,23 @@
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
-#include
-#include
-#include
+#ifndef _VSI_NN_OP_GATHER_ND_H
+#define _VSI_NN_OP_GATHER_ND_H
-#include "vsi_nn_graph.h"
-#include "vsi_nn_node.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_ops.h"
-#include "vsi_nn_log.h"
-#include "utils/vsi_nn_util.h"
+#include "vsi_nn_types.h"
-uint8_t * vsi_nn_VdataCreate
- (
- vsi_nn_graph_t * graph,
- vsi_nn_node_t * node,
- uint32_t * p_stream_size
- )
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_gather_nd_param
{
- return NULL;
-} /* vsi_nn_VdataCreate() */
+ int32_t batch_dims;
+} vsi_nn_gather_nd_param;
-vsi_nn_tensor_t * vsi_nn_CreateVDataTensor
- (
- vsi_nn_graph_t * graph,
- uint8_t * stream,
- vsi_nn_tensor_attr_t * attr
- )
-{
- return NULL;
-} /* vsi_nn_CreateVDataTensor() */
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_globallppool.h b/src/tim/vx/internal/include/ops/vsi_nn_op_globallppool.h
new file mode 100644
index 0000000..66e0f5b
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_globallppool.h
@@ -0,0 +1,44 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_GLOBALLPPOOL_H
+#define _VSI_NN_OP_GLOBALLPPOOL_H
+
+#include "vsi_nn_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_globallppool_param
+{
+ int32_t p;
+} vsi_nn_globallppool_param;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_grid_sample.h b/src/tim/vx/internal/include/ops/vsi_nn_op_grid_sample.h
new file mode 100644
index 0000000..0a67e2b
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_grid_sample.h
@@ -0,0 +1,58 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_GRID_SAMPLE_H
+#define _VSI_NN_OP_GRID_SAMPLE_H
+
+#include "vsi_nn_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//typedef uint32_t vsi_nn_grid_sample_mode_t;
+//enum { bilinear = 0, nearest };
+//
+//typedef uint32_t vsi_nn_grid_sample_padding_mode_t;
+//enum { zeros = 0, CONST };
+
+typedef struct _grid_sample_local_data_t {
+ int32_t placeholder;
+} grid_sample_local_data_t;
+
+typedef struct _vsi_nn_grid_sample_param
+{
+ grid_sample_local_data_t* local;
+ vsi_enum mode;
+ vsi_bool align_corners;
+ vsi_nn_pad_mode_e padding_mode;
+ int32_t const_val;
+} vsi_nn_grid_sample_param;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_imageprocess.h b/src/tim/vx/internal/include/ops/vsi_nn_op_imageprocess.h
index 7b68724..43c219d 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_imageprocess.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_imageprocess.h
@@ -67,7 +67,7 @@ typedef struct _vsi_nn_imageprocess_param
int32_t mean_value_size;
float* mean_value;
} mean;
-} vsi_nn_imageprocess_param;
+} VSI_PUBLIC_TYPE vsi_nn_imageprocess_param;
/**
* Insert imageprocess op for image pre process
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_inverse_sigmoid.h b/src/tim/vx/internal/include/ops/vsi_nn_op_inverse_sigmoid.h
new file mode 100644
index 0000000..fa7093e
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_inverse_sigmoid.h
@@ -0,0 +1,45 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_INVERSE_SIGMOID_H
+#define _VSI_NN_OP_INVERSE_SIGMOID_H
+
+#include "vsi_nn_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_inverse_sigmoid_param
+{
+ // Add parameters here
+ float eps;
+} vsi_nn_inverse_sigmoid_param;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_lpnorm.h b/src/tim/vx/internal/include/ops/vsi_nn_op_lpnorm.h
new file mode 100644
index 0000000..47d1792
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_lpnorm.h
@@ -0,0 +1,45 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_LPNORM_H
+#define _VSI_NN_OP_LPNORM_H
+
+#include "vsi_nn_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_lpnorm_param
+{
+ int axis;
+ int p;
+} vsi_nn_lpnorm_param;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_maxunpool.h b/src/tim/vx/internal/include/ops/vsi_nn_op_maxunpool.h
new file mode 100644
index 0000000..4943840
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_maxunpool.h
@@ -0,0 +1,48 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_MAXUNPOOL_H
+#define _VSI_NN_OP_MAXUNPOOL_H
+
+#include "vsi_nn_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_maxunpool_param
+{
+ // Add parameters here
+ uint32_t ksize[2];
+ uint32_t pad[4];
+ uint32_t stride[2];
+ const uint32_t *output_size;
+} vsi_nn_maxunpool_param;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_pre_process_nv12.h b/src/tim/vx/internal/include/ops/vsi_nn_op_pre_process_nv12.h
index dddee8d..aa8fc82 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_pre_process_nv12.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_pre_process_nv12.h
@@ -68,6 +68,8 @@ typedef struct _vsi_nn_pre_process_nv12_param
vsi_bool reverse_channel;
vsi_nn_pre_process_nv12_lcl_data* local;
+
+ vsi_nn_nv_type nv_type;
} vsi_nn_pre_process_nv12_param;
#ifdef __cplusplus
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_reduce_mean_internal.h b/src/tim/vx/internal/include/ops/vsi_nn_op_reduce_mean_internal.h
index 20eb56c..8c2fea6 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_reduce_mean_internal.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_reduce_mean_internal.h
@@ -38,6 +38,7 @@ typedef struct _vsi_nn_reduce_mean_internal_param
vx_int32 *axis;
vx_uint32 axis_num;
float scale;
+ vsi_enum type;
} vsi_nn_reduce_mean_internal_param;
_compiler_assert(offsetof(vsi_nn_reduce_mean_internal_param, local) == 0, \
vsi_nn_reduce_mean_internal_h );
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_reversesequence.h b/src/tim/vx/internal/include/ops/vsi_nn_op_reversesequence.h
new file mode 100644
index 0000000..0b5a496
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_reversesequence.h
@@ -0,0 +1,45 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_REVERSESEQUENCE_H
+#define _VSI_NN_OP_REVERSESEQUENCE_H
+
+#include "vsi_nn_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_reversesequence_param
+{
+ int32_t batch_axis;
+ int32_t time_axis;
+} vsi_nn_reversesequence_param;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_roi_align.h b/src/tim/vx/internal/include/ops/vsi_nn_op_roi_align.h
index e24f043..20add49 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_roi_align.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_roi_align.h
@@ -38,6 +38,7 @@ typedef struct _vsi_nn_roi_align_param
float width_ratio;
int32_t height_sample_num;
int32_t width_sample_num;
+ vsi_nn_roi_align_type_e platform_type;
} vsi_nn_roi_align_param;
#ifdef __cplusplus
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_strided_slice.h b/src/tim/vx/internal/include/ops/vsi_nn_op_strided_slice.h
index d7bb3c7..583777d 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_strided_slice.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_strided_slice.h
@@ -71,6 +71,7 @@ typedef struct _vsi_nn_strided_slice_lcl_data2
vsi_bool is_dataconvert_op;
vsi_bool is_optimized;
+ vsi_bool is_same_shape;
strided_slice_param params;
} vsi_nn_strided_slice_lcl_data2;
diff --git a/src/tim/vx/internal/include/post/vsi_nn_post_cmupose.h b/src/tim/vx/internal/include/post/vsi_nn_post_cmupose.h
new file mode 100644
index 0000000..eb74f09
--- /dev/null
+++ b/src/tim/vx/internal/include/post/vsi_nn_post_cmupose.h
@@ -0,0 +1,163 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#ifndef _VSI_NN_POST_CMUPOSE_H_
+#define _VSI_NN_POST_CMUPOSE_H_
+
+#include "utils/vsi_nn_link_list.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_subset_data_t
+{
+ float idx[20];
+}vsi_nn_subset_data_t;
+
+typedef struct _vsi_nn_subset_t
+{
+ vsi_nn_link_list_t link_list;
+ vsi_nn_subset_data_t data;
+} VSI_PUBLIC_TYPE vsi_nn_subset_t;
+
+typedef struct _vsi_nn_peaks_data_t
+{
+ uint32_t location[2];
+ float score;
+ uint32_t id;
+} VSI_PUBLIC_TYPE vsi_nn_peaks_data_t;
+
+typedef struct _vsi_nn_peaks_t
+{
+ vsi_nn_link_list_t link_list;
+ vsi_nn_peaks_data_t peak;
+} VSI_PUBLIC_TYPE vsi_nn_peaks_t;
+
+typedef struct _vsi_nn_conncection_data_t
+{
+ uint32_t x;
+ uint32_t y;
+ float score;
+ uint32_t i;
+ uint32_t j;
+}vsi_nn_connection_data_t;
+
+typedef struct _vsi_nn_connection_t
+{
+ vsi_nn_link_list_t link_list;
+ vsi_nn_connection_data_t data;
+}vsi_nn_connection_t;
+
+typedef struct _vsi_nn_con_candidate_data_t
+{
+ uint32_t i;
+ uint32_t j;
+ float score;
+ float candAB;
+}vsi_nn_con_candidate_data_t;
+
+typedef struct _vsi_nn_con_candidate_t
+{
+ vsi_nn_link_list_t link_list;
+ vsi_nn_con_candidate_data_t data;
+}vsi_nn_con_candidate_t;
+
+typedef struct _vsi_nn_cmupose_multiplier_t
+{
+ float *size;
+ uint32_t num;
+}vsi_nn_cmupose_multiplier_t;
+
+typedef struct _vsi_nn_cmupose_image_t
+{
+ uint32_t width;
+ uint32_t height;
+ uint32_t channel;
+} VSI_PUBLIC_TYPE vsi_nn_cmupose_image_t;
+
+typedef struct _vsi_nn_cmupose_scale_search_t
+{
+ float *size;
+ uint32_t num;
+}vsi_nn_cmupose_scale_search_t;
+
+typedef struct _vsi_nn_cmupose_model_t
+{
+ uint32_t boxsize;
+ uint32_t stride;
+ uint32_t padValue;
+} VSI_PUBLIC_TYPE vsi_nn_cmupose_model_t;
+
+typedef struct _vsi_nn_cmupose_param_t
+{
+ float thre1;
+ float thre2;
+ float thre3;
+ uint32_t mid_num;
+ vsi_nn_cmupose_scale_search_t scale_search;
+} VSI_PUBLIC_TYPE vsi_nn_cmupose_param_t;
+
+typedef struct _vsi_nn_cmupose_inputs_t
+{
+ vsi_nn_tensor_t *net_out;
+} VSI_PUBLIC_TYPE vsi_nn_cmupose_inputs_t;
+
+typedef struct _vsi_nn_cmupose_config_t
+{
+ vsi_nn_cmupose_inputs_t inputs;
+ vsi_nn_cmupose_param_t param;
+ vsi_nn_cmupose_model_t model;
+ vsi_nn_cmupose_image_t image;
+} VSI_PUBLIC_TYPE vsi_nn_cmupose_config_t;
+
+OVXLIB_API vsi_status vsi_nn_CMUPose_Post_Process
+ (
+ float *net_out,
+ vsi_nn_cmupose_config_t *config,
+ vsi_nn_peaks_t ***all_peaks_out,
+ uint32_t *all_peaks_num_out,
+ vsi_nn_subset_t **subset_list_out,
+ vsi_nn_peaks_data_t **peak_candidate_out,
+ uint32_t *peak_candidate_num_out
+ );
+
+OVXLIB_API vsi_status vsi_nn_CMUPose_PostProcess
+ (
+ vsi_nn_graph_t *graph,
+ vsi_nn_cmupose_inputs_t *inputs,
+ vsi_nn_cmupose_image_t *image,
+ vsi_nn_cmupose_param_t *param,
+ vsi_nn_cmupose_model_t *model,
+ vsi_nn_peaks_t ***all_peaks,
+ uint32_t *all_peaks_num,
+ vsi_nn_peaks_data_t **candidate,
+ uint32_t *candidate_num,
+ vsi_nn_subset_t **subset
+ );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/tim/vx/internal/include/post/vsi_nn_post_fasterrcnn.h b/src/tim/vx/internal/include/post/vsi_nn_post_fasterrcnn.h
new file mode 100644
index 0000000..8a7680d
--- /dev/null
+++ b/src/tim/vx/internal/include/post/vsi_nn_post_fasterrcnn.h
@@ -0,0 +1,79 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#ifndef _VSI_NN_POST_FASTERRCNN_H_
+#define _VSI_NN_POST_FASTERRCNN_H_
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_node_type.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_link_list.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _vsi_nn_fasterrcnn_box_t
+{
+ vsi_nn_link_list_t link_list;
+
+ /* upper-left coordinate(x1,y1) */
+ float x1;
+ float y1;
+ /* lower-right coordinate(x2,y2) */
+ float x2;
+ float y2;
+ float score;
+ uint32_t class_id;
+} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_box_t;
+
+typedef struct _vsi_nn_fasterrcnn_param_t
+{
+ float conf_thresh;
+ float nms_thresh;
+ const char **classes;
+ uint32_t classes_num;
+ uint32_t rois_num;
+ vsi_nn_proposal_im_info iminfo;
+} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_param_t;
+
+typedef struct _vsi_nn_fasterrcnn_inputs_t
+{
+ vsi_nn_tensor_t *rois;
+ vsi_nn_tensor_t *cls;
+ vsi_nn_tensor_t *bbox;
+} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_inputs_t;
+
+OVXLIB_API vsi_status vsi_nn_FasterRCNN_PostProcess
+ (
+ vsi_nn_graph_t *graph,
+ vsi_nn_fasterrcnn_inputs_t *inputs,
+ vsi_nn_fasterrcnn_param_t *param,
+ vsi_nn_fasterrcnn_box_t **dets_box
+ );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/tim/vx/internal/include/utils/vsi_nn_binary_tree.h b/src/tim/vx/internal/include/utils/vsi_nn_binary_tree.h
index 186f381..41ba068 100644
--- a/src/tim/vx/internal/include/utils/vsi_nn_binary_tree.h
+++ b/src/tim/vx/internal/include/utils/vsi_nn_binary_tree.h
@@ -29,8 +29,9 @@ extern "C"{
#endif
#include
+#include "vsi_nn_feature_config.h"
-typedef int64_t vsi_nn_binary_tree_key_t;
+typedef int64_t VSI_PUBLIC_TYPE vsi_nn_binary_tree_key_t;
#define vsi_nn_BinaryTreeInitRoot(n) do{n = NULL;} while (0);
@@ -40,7 +41,7 @@ typedef struct _vsi_nn_binary_tree
struct _vsi_nn_binary_tree * right;
vsi_nn_binary_tree_key_t key;
void * data_ptr;
-} vsi_nn_binary_tree_t;
+} VSI_PUBLIC_TYPE vsi_nn_binary_tree_t;
OVXLIB_API void vsi_nn_BinaryTreeRemoveNode
(
diff --git a/src/tim/vx/internal/include/utils/vsi_nn_dlfcn.h b/src/tim/vx/internal/include/utils/vsi_nn_dlfcn.h
new file mode 100644
index 0000000..82baf12
--- /dev/null
+++ b/src/tim/vx/internal/include/utils/vsi_nn_dlfcn.h
@@ -0,0 +1,65 @@
+#ifndef __VSI_NN_DLFCN_H
+#define __VSI_NN_DLFCN_H
+
+#if (defined(_MSC_VER) || defined(_WIN32) || defined(__MINGW32))
+#define RTLD_LAZY 0
+#define RTLD_NOW 0
+
+#define RTLD_GLOBAL (1 << 1)
+#define RTLD_LOCAL (1 << 2)
+
+#define RTLD_DEFAULT ((void *)0)
+#define RTLD_NEXT ((void *)-1)
+
+#else
+#include
+#endif
+
+/**
+ * Opend a shared library
+ *
+ * @param[in] Library path
+ * @param[in] Opend mode.
+ *
+ * @return Library handle on success, or NULL otherwise.
+ */
+void* vsi_nn_dlopen
+ (
+ const char *file,
+ int mode
+ );
+
+/**
+ * Close the opened library
+ *
+ * @param[in] Library handler
+ *
+ * @return TRUE on success
+ */
+int vsi_nn_dlclose
+ (
+ void *handle
+ );
+
+/**
+ * Find symbol from opened library
+ *
+ * @param[in] Library handler
+ * @param[in] Symbol name to find.
+ *
+ * @return Symbol
+ */
+void* vsi_nn_dlsym
+ (
+ void *handle,
+ const char *name
+ );
+
+/**
+ * Get error info.
+ *
+ * @return Error message.
+ */
+char * vsi_nn_dlerror(void);
+#endif
+
diff --git a/src/tim/vx/internal/include/utils/vsi_nn_dtype_util_prv.h b/src/tim/vx/internal/include/utils/vsi_nn_dtype_util_prv.h
index 7eaec28..ab63a3c 100644
--- a/src/tim/vx/internal/include/utils/vsi_nn_dtype_util_prv.h
+++ b/src/tim/vx/internal/include/utils/vsi_nn_dtype_util_prv.h
@@ -464,6 +464,7 @@ static VSI_INLINE_API vsi_status dtype_to_float32
case VSI_NN_TYPE_BOOL8:
case VSI_NN_TYPE_UINT8:
case VSI_NN_TYPE_INT16:
+ case VSI_NN_TYPE_UINT16:
case VSI_NN_TYPE_INT32:
{
int32_t src_value = 0;
@@ -516,6 +517,7 @@ static VSI_INLINE_API vsi_status float32_to_dtype
case VSI_NN_TYPE_BOOL8:
case VSI_NN_TYPE_UINT8:
case VSI_NN_TYPE_INT16:
+ case VSI_NN_TYPE_UINT16:
case VSI_NN_TYPE_INT32:
case VSI_NN_TYPE_UINT32:
{
diff --git a/src/tim/vx/internal/include/utils/vsi_nn_link_list.h b/src/tim/vx/internal/include/utils/vsi_nn_link_list.h
index e16d9e8..7e6afb2 100644
--- a/src/tim/vx/internal/include/utils/vsi_nn_link_list.h
+++ b/src/tim/vx/internal/include/utils/vsi_nn_link_list.h
@@ -36,7 +36,7 @@ typedef struct _vsi_nn_link_list
{
struct _vsi_nn_link_list * prev;
struct _vsi_nn_link_list * next;
-} vsi_nn_link_list_t;
+} VSI_PUBLIC_TYPE vsi_nn_link_list_t;
typedef void ( * vsi_nn_link_list_init_t )
(
diff --git a/src/tim/vx/internal/include/utils/vsi_nn_map.h b/src/tim/vx/internal/include/utils/vsi_nn_map.h
index 33ac22a..37754c9 100644
--- a/src/tim/vx/internal/include/utils/vsi_nn_map.h
+++ b/src/tim/vx/internal/include/utils/vsi_nn_map.h
@@ -32,7 +32,7 @@
extern "C"{
#endif
-typedef vsi_nn_binary_tree_key_t vsi_nn_map_key_t;
+typedef vsi_nn_binary_tree_key_t VSI_PUBLIC_TYPE vsi_nn_map_key_t;
typedef struct _vsi_nn_map_key_list
{
@@ -45,7 +45,7 @@ typedef struct _vsi_nn_map
int size;
vsi_nn_map_key_list_t * keys;
vsi_nn_binary_tree_t * values;
-} vsi_nn_map_t;
+} VSI_PUBLIC_TYPE vsi_nn_map_t;
OVXLIB_API void vsi_nn_MapInit
(
diff --git a/src/tim/vx/internal/include/vsi_nn_compatibility.h b/src/tim/vx/internal/include/vsi_nn_compatibility.h
index bcf2f25..4c28b94 100644
--- a/src/tim/vx/internal/include/vsi_nn_compatibility.h
+++ b/src/tim/vx/internal/include/vsi_nn_compatibility.h
@@ -99,6 +99,30 @@ typedef enum vx_nn_activation_function_e vx_co
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_NONE VX_NN_ACTIVATION_NONE
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SWISH VX_NN_ACTIVATION_SWISH
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HWISH VX_NN_ACTIVATION_HSWISH
+#if (VX_ACTIVATION_EXT2_SUPPORT)
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SIGN VX_NN_ACTIVATION_SIGN_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HSIGMOID VX_NN_ACTIVATION_HSIGMOID_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_NEG VX_NN_ACTIVATION_NEG_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_CLIP VX_NN_ACTIVATION_CLIP_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_EXP VX_NN_ACTIVATION_EXP_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SIN VX_NN_ACTIVATION_SIN_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_COS VX_NN_ACTIVATION_COS_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_LOG VX_NN_ACTIVATION_LOG_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_MISH VX_NN_ACTIVATION_MISH_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_GELU VX_NN_ACTIVATION_GELU_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HGELU VX_NN_ACTIVATION_HGELU_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ELU VX_NN_ACTIVATION_ELU_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SELU VX_NN_ACTIVATION_SELU_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_CELU VX_NN_ACTIVATION_CELU_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_RECIPROCAL VX_NN_ACTIVATION_RECIPROCAL_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SOFTSIGN VX_NN_ACTIVATION_SOFTSIGN_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ATAN VX_NN_ACTIVATION_ATAN_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ATANH VX_NN_ACTIVATION_ATANH_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ACOSH VX_NN_ACTIVATION_ACOSH_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_INVERSE_SIGMOID VX_NN_ACTIVATION_INVERSE_SIGMOID_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ROUND VX_NN_ACTIVATION_ROUND_VSI
+#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ERF VX_NN_ACTIVATION_ERF_VSI
+#endif
/*
keep the backward compatibility with spec 1.1 for vxCopyTensorPatch_11
diff --git a/src/tim/vx/internal/include/vsi_nn_context.h b/src/tim/vx/internal/include/vsi_nn_context.h
index f5ace92..75e5ab7 100644
--- a/src/tim/vx/internal/include/vsi_nn_context.h
+++ b/src/tim/vx/internal/include/vsi_nn_context.h
@@ -77,6 +77,7 @@ typedef struct _vsi_nn_runtime_option_t
int32_t enable_concat_optimize;
int32_t enable_asymi8_to_u8;
int32_t enable_dataconvert_optimize;
+ int32_t enable_stream_processor;
} vsi_nn_runtime_option_t;
/**
@@ -87,7 +88,7 @@ typedef struct _vsi_nn_context_t
vx_context c;
vsi_nn_hw_config_t config;
vsi_nn_runtime_option_t options;
-} *vsi_nn_context_t;
+} VSI_PUBLIC_TYPE *vsi_nn_context_t;
/**
* Create context
diff --git a/src/tim/vx/internal/include/vsi_nn_feature_config.h b/src/tim/vx/internal/include/vsi_nn_feature_config.h
index 8906a96..01ec04c 100644
--- a/src/tim/vx/internal/include/vsi_nn_feature_config.h
+++ b/src/tim/vx/internal/include/vsi_nn_feature_config.h
@@ -1,7 +1,46 @@
+/****************************************************************************
+*
+* Copyright (c) 2019 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the Software),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
/*****Auto generated header file, Please DO NOT modify manually!*****/
#ifndef _VSI_NN_FEATURE_CONFIG_H
#define _VSI_NN_FEATURE_CONFIG_H
+#define VSI_PUBLIC_TYPE
+#include
+#if defined(VX_KHR_COMPATIBILITY) && (0x1==VX_KHR_COMPATIBILITY)
+#include
+#endif
+#ifndef VSI_PERCHANNEL_QUANTIZATION_SUPPORT
#define VSI_PERCHANNEL_QUANTIZATION_SUPPORT
+#endif
+#if defined(VX_INVALIDATE_HANDLE_SUPPORT) && VX_INVALIDATE_HANDLE_SUPPORT
+#define VSI_INVALIDATE_HANDLE_SUPPORT
+#endif
+#ifndef VSI_0_D_TENSOR_SUPPORT
+#define VSI_0_D_TENSOR_SUPPORT
+#endif
+#if defined(VX_TENSORVIEW_ON_ANY_DIM) && VX_TENSORVIEW_ON_ANY_DIM
+#define VSI_CONCAT_ENHANCE_SUPPORT
+#endif
#endif
diff --git a/src/tim/vx/internal/include/vsi_nn_graph.h b/src/tim/vx/internal/include/vsi_nn_graph.h
index c9c0687..1756870 100644
--- a/src/tim/vx/internal/include/vsi_nn_graph.h
+++ b/src/tim/vx/internal/include/vsi_nn_graph.h
@@ -74,7 +74,7 @@ extern "C" {
/**
* Graph structure
*/
-struct _vsi_nn_graph
+struct VSI_PUBLIC_TYPE _vsi_nn_graph
{
/** Context */
vsi_nn_context_t ctx;
@@ -167,6 +167,8 @@ struct _vsi_nn_graph
} complete_signal;
vsi_bool isAllowFastMode;
+
+ //DO NOT modify this sturct.
};
/**
diff --git a/src/tim/vx/internal/include/vsi_nn_log.h b/src/tim/vx/internal/include/vsi_nn_log.h
index d8b5bad..307f06d 100644
--- a/src/tim/vx/internal/include/vsi_nn_log.h
+++ b/src/tim/vx/internal/include/vsi_nn_log.h
@@ -46,7 +46,7 @@ typedef enum _vsi_nn_log_level_e
VSI_NN_LOG_WARN,
VSI_NN_LOG_INFO,
VSI_NN_LOG_DEBUG
-}vsi_nn_log_level_e;
+} VSI_PUBLIC_TYPE vsi_nn_log_level_e;
#define VSI_NN_MAX_DEBUG_BUFFER_LEN 1024
#define VSILOGE( fmt, ... ) \
diff --git a/src/tim/vx/internal/include/vsi_nn_node.h b/src/tim/vx/internal/include/vsi_nn_node.h
index b922204..0a69dbd 100644
--- a/src/tim/vx/internal/include/vsi_nn_node.h
+++ b/src/tim/vx/internal/include/vsi_nn_node.h
@@ -58,7 +58,7 @@ typedef struct _vsi_nn_node_attr_t
} vsi_nn_node_attr_t;
/** Node structure */
-struct _vsi_nn_node
+struct VSI_PUBLIC_TYPE _vsi_nn_node
{
/**
* Graph handle
diff --git a/src/tim/vx/internal/include/vsi_nn_node_type.h b/src/tim/vx/internal/include/vsi_nn_node_type.h
index d41e0f0..37032f4 100644
--- a/src/tim/vx/internal/include/vsi_nn_node_type.h
+++ b/src/tim/vx/internal/include/vsi_nn_node_type.h
@@ -200,8 +200,17 @@
#include "ops/vsi_nn_op_scatter_elements.h"
#include "ops/vsi_nn_op_pre_process_yuv422.h"
#include "ops/vsi_nn_op_bucketize.h"
+#include "ops/vsi_nn_op_globallppool.h"
+#include "ops/vsi_nn_op_gather_nd.h"
+#include "ops/vsi_nn_op_avg_pool3d.h"
+#include "ops/vsi_nn_op_maxunpool.h"
+#include "ops/vsi_nn_op_reversesequence.h"
+#include "ops/vsi_nn_op_grid_sample.h"
+#include "ops/vsi_nn_op_bilinear_grid_sample.h"
+#include "ops/vsi_nn_op_lpnorm.h"
/* custom node head define define */
#include "custom/vsi_nn_custom_node_type.h"
+#include "ops/vsi_nn_op_inverse_sigmoid.h"
#if defined(__cplusplus)
extern "C"{
@@ -386,6 +395,15 @@ typedef union _vsi_nn_nn_param
vsi_nn_scatter_elements_param scatter_elements;
vsi_nn_pre_process_yuv422_param pre_process_yuv422;
vsi_nn_bucketize_param bucketize;
+ vsi_nn_globallppool_param globallppool;
+ vsi_nn_gather_nd_param gather_nd;
+ vsi_nn_avg_pool3d_param avg_pool3d;
+ vsi_nn_maxunpool_param maxunpool;
+ vsi_nn_reversesequence_param reversesequence;
+ vsi_nn_inverse_sigmoid_param inverse_sigmoid;
+ vsi_nn_grid_sample_param gridsample;
+ vsi_nn_bilinear_grid_sample_param bilinear_grid_sample;
+ vsi_nn_lpnorm_param lpnorm;
void* client_param;
/* custom node data struct define */
diff --git a/src/tim/vx/internal/include/vsi_nn_ops.h b/src/tim/vx/internal/include/vsi_nn_ops.h
index 40671e7..de26f0d 100644
--- a/src/tim/vx/internal/include/vsi_nn_ops.h
+++ b/src/tim/vx/internal/include/vsi_nn_ops.h
@@ -48,7 +48,7 @@ extern "C"{
* @see include/custom/custom_ops.def
* @see include/internal/internal_ops.def
*/
-typedef int32_t vsi_nn_op_t; enum
+typedef int32_t VSI_PUBLIC_TYPE vsi_nn_op_t; enum
{
#define DEF_OP( NAME, ... ) VSI_NN_OP_##NAME,
#include "interface/ops.def"
@@ -126,7 +126,7 @@ typedef struct _vsi_nn_op_proc
vsi_nn_op_optimize_t optimize;
uint32_t input_num;
uint32_t output_num;
-} vsi_nn_op_proc_t;
+} VSI_PUBLIC_TYPE vsi_nn_op_proc_t;
/*------------------------------------
Functions
diff --git a/src/tim/vx/internal/include/vsi_nn_platform.h b/src/tim/vx/internal/include/vsi_nn_platform.h
index fc41e9f..f5548c8 100644
--- a/src/tim/vx/internal/include/vsi_nn_platform.h
+++ b/src/tim/vx/internal/include/vsi_nn_platform.h
@@ -26,13 +26,6 @@
#include "vsi_nn_feature_config.h"
-#ifdef VSI_40BIT_VA_SUPPORT
-#ifdef VX_VA40_EXT_SUPPORT
-#undef VX_VA40_EXT_SUPPORT
-#endif
-#define VX_VA40_EXT_SUPPORT 1
-#endif
-
#include
#include
#include
@@ -48,12 +41,4 @@
*/
#include "vsi_nn_compatibility.h"
-#if defined(__cplusplus)
-extern "C"{
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
#endif
diff --git a/src/tim/vx/internal/include/vsi_nn_pre_post_process.h b/src/tim/vx/internal/include/vsi_nn_pre_post_process.h
index 5da4b82..227b17f 100644
--- a/src/tim/vx/internal/include/vsi_nn_pre_post_process.h
+++ b/src/tim/vx/internal/include/vsi_nn_pre_post_process.h
@@ -87,6 +87,7 @@ typedef enum
VSI_NN_SOURCE_FORMAT_IMAGE_RGB888_PLANAR_SEP,
VSI_NN_SOURCE_FORMAT_IMAGE_YUYV422,
VSI_NN_SOURCE_FORMAT_IMAGE_UYVY422,
+ VSI_NN_SOURCE_FORMAT_IMAGE_NV21,
} vsi_nn_preprocess_source_format_e;
/**
@@ -98,7 +99,7 @@ typedef struct
vsi_nn_preprocess_type_e type;
/** Preprocess paramters */
void* param;
-} vsi_nn_preprocess_base_t;
+} VSI_PUBLIC_TYPE vsi_nn_preprocess_base_t;
/**
* Postprocess base structure
@@ -109,7 +110,7 @@ typedef struct
vsi_nn_postprocess_type_e type;
/** Postrocess paramters */
void* param;
-} vsi_nn_postprocess_base_t;
+} VSI_PUBLIC_TYPE vsi_nn_postprocess_base_t;
/**
* Process dtype convert parameter structure
diff --git a/src/tim/vx/internal/include/vsi_nn_pub.h b/src/tim/vx/internal/include/vsi_nn_pub.h
index d36f570..48525a4 100644
--- a/src/tim/vx/internal/include/vsi_nn_pub.h
+++ b/src/tim/vx/internal/include/vsi_nn_pub.h
@@ -44,6 +44,7 @@
#include "vsi_nn_types.h"
#include "vsi_nn_version.h"
#include "vsi_nn_assert.h"
+#include "vsi_nn_post.h"
#include "vsi_nn_rnn.h"
#include "vsi_nn_test.h"
#include "vsi_nn_pre_post_process.h"
diff --git a/src/tim/vx/internal/include/vsi_nn_rnn.h b/src/tim/vx/internal/include/vsi_nn_rnn.h
index 519d783..21d4009 100644
--- a/src/tim/vx/internal/include/vsi_nn_rnn.h
+++ b/src/tim/vx/internal/include/vsi_nn_rnn.h
@@ -44,7 +44,7 @@ typedef struct
{
vsi_nn_tensor_id_t output;
vsi_nn_tensor_id_t inputs[VSI_NN_MAX_RNN_CONNECTION_INPUTS];
-} vsi_nn_rnn_external_connection_t;
+} VSI_PUBLIC_TYPE vsi_nn_rnn_external_connection_t;
/*-------------------------------------------
Procedure to prepare input data, return FALSE
diff --git a/src/tim/vx/internal/include/vsi_nn_tensor.h b/src/tim/vx/internal/include/vsi_nn_tensor.h
index 7a33586..5b7bdb9 100644
--- a/src/tim/vx/internal/include/vsi_nn_tensor.h
+++ b/src/tim/vx/internal/include/vsi_nn_tensor.h
@@ -63,7 +63,7 @@ typedef enum
VSI_NN_DIM_FMT_NHWC = 0x01,
VSI_NN_DIM_FMT_NA = 0xFF,
VSI_NN_DIM_FMT_AUTO = VSI_NN_DIM_FMT_NA - 1,
-} vsi_nn_dim_fmt_e;
+} VSI_PUBLIC_TYPE vsi_nn_dim_fmt_e;
/**
* Quantization type.
@@ -125,7 +125,7 @@ typedef struct vsi_nn_dtype
#endif
};
};
-} vsi_nn_dtype_t;
+} VSI_PUBLIC_TYPE vsi_nn_dtype_t;
/**
* Tensor Attribute
@@ -150,15 +150,13 @@ typedef struct vsi_nn_tensor_attr
#ifdef VX_CREATE_TENSOR_SUPPORT_PHYSICAL
vsi_memory_type_e vsi_memory_type;
#endif
-#if VX_STREAM_PROCESSOR_SUPPORT
- vsi_bool is_dummy;
-#endif
-} vsi_nn_tensor_attr_t;
+ // DO NOT modify this struct.
+} VSI_PUBLIC_TYPE vsi_nn_tensor_attr_t;
/**
* Tensor structure
*/
-struct _vsi_nn_tensor
+struct VSI_PUBLIC_TYPE _vsi_nn_tensor
{
/** Tensor attributes */
vsi_nn_tensor_attr_t attr;
@@ -168,6 +166,7 @@ struct _vsi_nn_tensor
vx_weights_biases_parameter wb;
/** Mark tensor swapped by vxSwapTensor */
int8_t is_swapped;
+ // DO NOT modify this struct.
};
/**
diff --git a/src/tim/vx/internal/include/vsi_nn_tensor_util.h b/src/tim/vx/internal/include/vsi_nn_tensor_util.h
index 1083d21..4b997f3 100644
--- a/src/tim/vx/internal/include/vsi_nn_tensor_util.h
+++ b/src/tim/vx/internal/include/vsi_nn_tensor_util.h
@@ -321,10 +321,38 @@ OVXLIB_API vsi_status vsi_nn_CopyDataToTensor
);
/**
- * Flush Handle
- * If you swap the handle of the tensor, you should flush it.
+ * Swap a tensor's Handle
+ * Swap handle to old_ptr to read/write, swap new handle to new_ptr to update handle.
*
- * @param[in] tensor Tensor handle.
+ * APP SHOULD maintain handle that created by itself to manage memory correctly,
+ * never free or wirte data for handel allocated by OVXLIB.
+ *
+ * OVXLIB would not maintain original handle anymore if new_ptr == NULL.
+ *
+ * Before free data in handle allocated by APP, vsi_nn_SwapHandle(tensor, NULL, &prev_ptr)
+ * should be called to get contol of handle.
+ *
+ * @param[in] tensor Tensor.
+ * @param[in] new_ptr New handle of tensor.
+ * @param[in] is_new_ptr_malloc_by_ovxlib If new_ptr is allocated by ovxlib while new_ptr is not NULL.
+ * @param[out] old_ptr Old handle of tensor.
+ *
+ * @return VSI_SUCCESS on success, or error core otherwise.
+ */
+OVXLIB_API vsi_status vsi_nn_SwapHandle
+(
+ vsi_nn_tensor_t* tensor,
+ void* new_ptr,
+ vsi_bool is_new_ptr_malloc_by_ovxlib,
+ void** old_ptr
+);
+
+/**
+ * Flush Handle
+ * Call this function to flush new data to the handle in hand.
+ * vsi_nn_FlushHandle() should be called at last to compleate the data writting operation.
+ *
+ * @param[in] tensor Tensor.
*
* @return VSI_SUCCESS on success, or error core otherwise.
*/
@@ -333,6 +361,20 @@ OVXLIB_API vsi_status vsi_nn_FlushHandle
const vsi_nn_tensor_t * tensor
);
+/**
+ * Invalidate Handle
+ * invalidate handle before copy data from tensor handle.
+ * Before read data in handle, vsi_nn_InvalidateHandle() should be called to do invalidate cache in APP.
+ *
+ * @param[in] tensor Tensor.
+ *
+ * @return VSI_SUCCESS on success, or error core otherwise.
+ */
+OVXLIB_API vsi_status vsi_nn_InvalidateHandle
+(
+ const vsi_nn_tensor_t* tensor
+);
+
/**
* Get Tensor Handle
* Get the handle of the tensor
@@ -348,6 +390,34 @@ OVXLIB_API vsi_status vsi_nn_GetTensorHandle
void** ptr
);
+/**
+ * Get Tensor is_scalar
+ * Get the is_scalar of the tensor
+ *
+ * @param[in] tensor Tensor.
+ *
+ * @return is_scalar flag of the tensor.
+ */
+OVXLIB_API int8_t vsi_nn_GetTensorIsScalar
+(
+ vsi_nn_tensor_t* tensor
+);
+
+/**
+ * Set Tensor is_scalar
+ * Set the is_scalar for the tensor
+ *
+ * @param[in] tensor Tensor.
+ * @param[in] new is_scalar value of the tensor.
+ *
+ * @return VSI_SUCCESS on success, or error core otherwise.
+ */
+OVXLIB_API vsi_status vsi_nn_SetTensorIsScalar
+(
+ vsi_nn_tensor_t* tensor,
+ int8_t is_scalar
+);
+
OVXLIB_API vsi_status vsi_nn_CopyRawDataToTensor
(
vsi_nn_graph_t* graph,
@@ -722,13 +792,6 @@ vsi_nn_tensor_t* vsi_nn_ConstTensorAdd_impl
#define vsi_nn_ConstTensorAdd(_graph, _output_attr, ...) \
vsi_nn_ConstTensorAdd_impl(_graph, _output_attr, __VA_ARGS__, END_OF_VARIADIC_ARGUMENTS)
-vsi_status vsi_nn_SwapHandle
- (
- vsi_nn_tensor_t * tensor,
- void * new_ptr,
- void ** old_ptr
- );
-
vsi_bool vsi_nn_ConvertTensor
(
vsi_nn_graph_t* graph,
diff --git a/src/tim/vx/internal/include/vsi_nn_types.h b/src/tim/vx/internal/include/vsi_nn_types.h
index 0a655c1..6238e4f 100644
--- a/src/tim/vx/internal/include/vsi_nn_types.h
+++ b/src/tim/vx/internal/include/vsi_nn_types.h
@@ -27,7 +27,6 @@
#include
#include "vsi_nn_platform.h"
-#include "vsi_nn_feature_config.h"
#if defined(__cplusplus)
extern "C"{
@@ -109,7 +108,7 @@ typedef enum
VSI_NN_PAD_AUTO,
VSI_NN_PAD_VALID,
VSI_NN_PAD_SAME
-} vsi_nn_pad_e;
+} VSI_PUBLIC_TYPE vsi_nn_pad_e;
/** reduce type enum */
typedef enum
@@ -142,14 +141,14 @@ typedef enum
{
VSI_NN_ROUND_CEIL,
VSI_NN_ROUND_FLOOR
-} vsi_nn_round_type_e;
+} VSI_PUBLIC_TYPE vsi_nn_round_type_e;
/** Optimize driction */
typedef enum
{
VSI_NN_OPTIMIZE_FORWARD,
VSI_NN_OPTIMIZE_BACKWARD
-} vsi_nn_opt_direction_e;
+} VSI_PUBLIC_TYPE vsi_nn_opt_direction_e;
#ifdef VX_CREATE_TENSOR_SUPPORT_PHYSICAL
typedef enum
{
@@ -195,7 +194,7 @@ typedef enum
#endif
VSI_NN_TYPE_VDATA = VX_TYPE_USER_STRUCT_START + 0x1,
-}vsi_nn_type_e;
+} VSI_PUBLIC_TYPE vsi_nn_type_e;
typedef int32_t vsi_nn_activation_e; enum
{
@@ -236,7 +235,7 @@ typedef enum
{
VSI_NN_GRAPH_PRELOAD_VIPSRAM,
VSI_NN_GRAPH_PRELOAD_AXISRAM
-} vsi_nn_graph_attr_preload_type_e;
+} VSI_PUBLIC_TYPE vsi_nn_graph_attr_preload_type_e;
typedef enum _vsi_nn_node_attr_preload_type_e
{
@@ -257,23 +256,35 @@ typedef enum _vsi_nn_yuv_type
VSI_NN_YUV_TYPE_UYUV422
}vsi_nn_yuv_type;
+typedef enum _vsi_nn_nv_type
+{
+ VSI_NN_YUV_TYPE_NV12,
+ VSI_NN_YUV_TYPE_NV21
+}vsi_nn_nv_type;
+
+typedef enum _vsi_nn_roi_align_type_e
+{
+ VSI_NN_ROI_ALIGN_ANDROID,
+ VSI_NN_ROI_ALIGN
+} vsi_nn_roi_align_type_e;
+
/** Deprecated */
typedef uint32_t vsi_nn_size_t;
/** Tensor id type */
-typedef uint32_t vsi_nn_tensor_id_t;
+typedef uint32_t VSI_PUBLIC_TYPE vsi_nn_tensor_id_t;
/** Node id type */
typedef uint32_t vsi_nn_node_id_t;
/** @see _vsi_nn_graph */
-typedef struct _vsi_nn_graph vsi_nn_graph_t;
+typedef struct _vsi_nn_graph VSI_PUBLIC_TYPE vsi_nn_graph_t;
/** @see _vsi_nn_node */
-typedef struct _vsi_nn_node vsi_nn_node_t;
+typedef struct _vsi_nn_node VSI_PUBLIC_TYPE vsi_nn_node_t;
/** @see _vsi_nn_tensor */
-typedef struct _vsi_nn_tensor vsi_nn_tensor_t;
+typedef struct _vsi_nn_tensor VSI_PUBLIC_TYPE vsi_nn_tensor_t;
#if defined(__cplusplus)
}
diff --git a/src/tim/vx/internal/include/vsi_nn_version.h b/src/tim/vx/internal/include/vsi_nn_version.h
index 5079bfe..280f0cc 100644
--- a/src/tim/vx/internal/include/vsi_nn_version.h
+++ b/src/tim/vx/internal/include/vsi_nn_version.h
@@ -33,7 +33,7 @@ extern "C"{
#define VSI_NN_VERSION_MAJOR 1
#define VSI_NN_VERSION_MINOR 1
-#define VSI_NN_VERSION_PATCH 57
+#define VSI_NN_VERSION_PATCH 74
#define VSI_NN_VERSION \
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
diff --git a/src/tim/vx/internal/src/Android.mk b/src/tim/vx/internal/src/Android.mk
new file mode 100644
index 0000000..a1b3683
--- /dev/null
+++ b/src/tim/vx/internal/src/Android.mk
@@ -0,0 +1,144 @@
+#
+# Build Vivante chipinfo for android.
+#
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+ifeq ($(AQROOT),)
+$(error Please set AQROOT env first)
+endif
+
+include $(AQROOT)/Android.mk.def
+
+ifeq ($(PLATFORM_VENDOR),1)
+LOCAL_VENDOR_MODULE := true
+endif
+
+LOCAL_SRC_FILES := \
+ vsi_nn_context.c \
+ vsi_nn_client_op.c \
+ vsi_nn_graph.c \
+ vsi_nn_node_attr_template.c \
+ vsi_nn_node.c \
+ vsi_nn_ops.c \
+ vsi_nn_daemon.c \
+ vsi_nn_tensor.c \
+ vsi_nn_version.c \
+ vsi_nn_rnn.c \
+ vsi_nn_rnn_helper.c \
+ vsi_nn_internal_node.c \
+ vsi_nn_log.c \
+ vsi_nn_graph_optimization.c \
+ vsi_nn_pre_post_process.c
+
+
+LOCAL_SRC_FILES += \
+ utils/vsi_nn_code_generator.c \
+ utils/vsi_nn_binary_tree.c \
+ utils/vsi_nn_map.c \
+ utils/vsi_nn_hashmap.c \
+ utils/vsi_nn_link_list.c \
+ utils/vsi_nn_math.c \
+ utils/vsi_nn_dtype.c \
+ utils/vsi_nn_dtype_util.c \
+ utils/vsi_nn_shape_util.c \
+ utils/vsi_nn_limits.c \
+ utils/vsi_nn_tensor_op.c \
+ utils/vsi_nn_util.c \
+ utils/vsi_nn_dlfcn.c \
+ utils/vsi_nn_constraint_check.c
+
+
+LOCAL_SRC_FILES += \
+ quantization/vsi_nn_dynamic_fixed_point.c \
+ quantization/vsi_nn_asymmetric_affine.c \
+ quantization/vsi_nn_perchannel_symmetric_affine.c \
+
+
+LOCAL_SRC_FILES += \
+ post/vsi_nn_post_fasterrcnn.c \
+ post/vsi_nn_post_cmupose.c
+
+LOCAL_SRC_FILES += \
+ cpu_backend/vsi_nn_cpu_backend.c \
+ cpu_backend/vsi_nn_cpu_backend_conv2d.c \
+ cpu_backend/vsi_nn_cpu_backend_deconv2d.c \
+ cpu_backend/npuref_interface.c
+
+
+LOCAL_SRC_FILES += libnnext/vsi_nn_libnnext_resource.c \
+ libnnext/vsi_nn_vxkernel.c
+
+LOCAL_SRC_FILES += kernel/vsi_nn_kernel.c \
+ kernel/vsi_nn_kernel_util.c \
+ kernel/vsi_nn_kernel_backend.c \
+ kernel/vsi_nn_kernel_eltwise.c \
+ kernel/vsi_nn_kernel_selector.c \
+ kernel/vsi_nn_kernel_node.c \
+ kernel/vsi_nn_kernel_param.c \
+ kernel/vsi_nn_kernel_gpu_shape_optimize.c \
+ kernel/vsi_nn_kernel_lut.c \
+ kernel/vsi_nn_spinst.c \
+ kernel/vsi_nn_sp_unit_operation.c \
+ kernel/vsi_nn_sp_lut.c \
+ kernel/vsi_nn_gpu.c
+
+LIBNNEXT_KERNEL_SOURCES := $(wildcard $(LOCAL_PATH)/libnnext/ops/kernel/*.c)
+LOCAL_SRC_FILES += $(LIBNNEXT_KERNEL_SOURCES:$(LOCAL_PATH)/%=%)
+
+KERNEL_SOURCES := $(wildcard $(LOCAL_PATH)/kernel/cl/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/cpu/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/evis/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/vx/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/sp/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/evis/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/cl/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/cpu/*.c)
+KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/sp/*.c)
+LOCAL_SRC_FILES += $(KERNEL_SOURCES:$(LOCAL_PATH)/%=%)
+
+OPERATION_SOURCES := $(wildcard $(LOCAL_PATH)/ops/*.c)
+LOCAL_SRC_FILES += $(OPERATION_SOURCES:$(LOCAL_PATH)/%=%)
+
+
+LOCAL_SHARED_LIBRARIES := \
+ liblog \
+ libjpeg \
+ libGAL \
+ libOpenVX \
+ libVSC \
+ libdl
+
+LOCAL_C_INCLUDES += \
+ external/libjpeg-turbo \
+ $(AQROOT)/sdk/inc/CL \
+ $(AQROOT)/sdk/inc/VX \
+ $(AQROOT)/sdk/inc/ \
+ $(AQROOT)/sdk/inc/HAL \
+ $(LOCAL_PATH)/../include \
+ $(LOCAL_PATH)/../include/ops \
+ $(LOCAL_PATH)/../include/utils \
+ $(LOCAL_PATH)/../include/infernce \
+ $(LOCAL_PATH)/../include/client \
+ $(LOCAL_PATH)/../include/cpu_backend \
+ $(LOCAL_PATH)/../include/libnnext \
+ $(LOCAL_PATH)/../src
+
+LOCAL_CFLAGS := \
+ -DLINUX \
+ -D'OVXLIB_API=__attribute__((visibility("default")))' \
+ -DANDROID_SDK_VERSION=$(PLATFORM_SDK_VERSION)\
+ -Wno-sign-compare \
+ -Wno-implicit-function-declaration \
+ -Wno-sometimes-uninitialized \
+ -Wno-unused-parameter \
+ -Wno-enum-conversion \
+ -Wno-missing-field-initializers \
+ -Wno-tautological-compare \
+ -Wno-missing-braces
+
+LOCAL_MODULE:= libovxlib
+LOCAL_MODULE_TAGS := optional
+LOCAL_PRELINK_MODULE := false
+include $(BUILD_SHARED_LIBRARY)
diff --git a/src/tim/vx/internal/src/custom/ops/kernel/cpu/custom_sample_cpu.c b/src/tim/vx/internal/src/custom/ops/kernel/cpu/custom_sample_cpu.c
new file mode 100644
index 0000000..a1e50a4
--- /dev/null
+++ b/src/tim/vx/internal/src/custom/ops/kernel/cpu/custom_sample_cpu.c
@@ -0,0 +1,184 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#include
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_platform.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_test.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "utils/vsi_nn_dtype_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vsi_nn_vxkernel.h"
+
+#define _CPU_ARG_NUM (1)
+#define _CPU_INPUT_NUM (2)
+#define _CPU_OUTPUT_NUM (1)
+#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
+#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
+#define _KERNEL_NAME ("com.vivantecorp.extension.CustomSampleVXC")
+
+#define SCALAR_INPUT_AXIS (3)
+
+__BEGIN_DECLS
+
+DEF_KERNEL_EXECUTOR(_softmax_compute)
+ (
+ vsi_nn_kernel_node_t node,
+ const vsi_nn_kernel_node_param_t* param,
+ size_t param_size
+ )
+{
+ vsi_status status = VX_SUCCESS;
+ float *buffer[_CPU_IO_NUM] = {NULL};
+ vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = {NULL};
+ vsi_nn_kernel_tensor_attr_t *attr[_CPU_IO_NUM] = {NULL};
+ uint32_t i = 0, out_elements = 0;
+ int32_t axis;
+
+ tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; // input0
+ tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; // input1
+ tensors[2] = (vsi_nn_kernel_tensor_t)param[2]; // output
+
+ attr[0] = vsi_nn_kernel_tensor_attr_create(tensors[0]);
+ attr[1] = vsi_nn_kernel_tensor_attr_create(tensors[1]);
+ attr[2] = vsi_nn_kernel_tensor_attr_create(tensors[2]);
+
+ status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
+ CHECK_STATUS_FAIL_GOTO(status, final );
+
+ /* alloc the float32 data buffer */
+ buffer[0] = (float *)vsi_nn_kernel_tensor_create_buffer(tensors[0], attr[0], TRUE);
+ CHECK_PTR_FAIL_GOTO(buffer[0], "Create input0 buffer fail.", final);
+
+ buffer[1] = (float *)vsi_nn_kernel_tensor_create_buffer(tensors[1], attr[1], TRUE);
+ CHECK_PTR_FAIL_GOTO(buffer[1], "Create input1 buffer fail.", final);
+
+ out_elements = (uint32_t)vsi_nn_kernel_tensor_attr_get_size(attr[2]);
+ buffer[2] = (float *)malloc(out_elements * sizeof(float));
+ CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
+ memset(buffer[2], 0, out_elements * sizeof(float));
+
+ /* CPU implement */
+ for(i = 0; i < out_elements; i++)
+ {
+ buffer[2][i] = buffer[0][i] + buffer[1][0];
+ }
+
+ status = vsi_nn_kernel_tensor_write_from_float(
+ tensors[2], attr[2], buffer[2], out_elements );
+final:
+ for(i = 0; i < _CPU_IO_NUM; i ++)
+ {
+ if(buffer[i])
+ {
+ free(buffer[i]);
+ }
+ vsi_nn_kernel_tensor_attr_release(&attr[i]);
+ }
+ return status;
+}
+
+static vx_param_description_t kernel_param_def[] =
+{
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
+};
+
+static const vx_kernel_description_t _kernel_info =
+{
+ KERNEL_ID_PLACEHOLDER,
+ _KERNEL_NAME,
+ _softmax_compute,
+ kernel_param_def,
+ _cnt_of_array( kernel_param_def ),
+ vsi_nn_KernelValidator,
+ NULL,
+ NULL,
+ vsi_nn_KernelInitializer,
+ vsi_nn_KernelDeinitializer
+};
+
+static vsi_status _query_kernel
+ (
+ vsi_nn_tensor_t* const* const inputs,
+ vsi_nn_tensor_t* const* const outputs,
+ vsi_nn_kernel_t* kernel
+ )
+{
+ memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
+ return VSI_SUCCESS;
+}
+
+static vsi_nn_kernel_node_t _setup
+ (
+ vsi_nn_graph_t * graph,
+ vsi_nn_tensor_t ** inputs,
+ size_t input_num,
+ vsi_nn_tensor_t ** outputs,
+ size_t output_num,
+ const vsi_nn_kernel_param_t * params,
+ vsi_nn_kernel_t * kernel
+ )
+{
+ vsi_status status = VSI_SUCCESS;
+ vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
+ vsi_nn_kernel_node_t node = NULL;
+ int32_t axis = 0;
+
+ axis = vsi_nn_kernel_param_get_int32(params, "axis");
+ status = _query_kernel(inputs, outputs, kernel);
+ if(status != VSI_SUCCESS)
+ {
+ return NULL;
+ }
+
+ node = vsi_nn_kernel_create_node(graph, kernel);
+ if(node == NULL)
+ {
+ return NULL;
+ }
+
+ /* Set inputs and outputs */
+ vsi_nn_kernel_node_pack_io(backend_params, _CPU_PARAM_NUM,
+ inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM);
+ backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
+ graph, I32, &axis);
+
+ /* Pass parameters to node. */
+ status = vsi_nn_kernel_node_pass_param(node, backend_params, _CPU_PARAM_NUM);
+ vsi_nn_kernel_scalar_release(&backend_params[SCALAR_INPUT_AXIS]);
+
+ return node;
+}
+
+__END_DECLS
+
+REGISTER_BACKEND_CPU( custom_sample, _setup )
diff --git a/src/tim/vx/internal/src/custom/ops/op_custom_sample.c b/src/tim/vx/internal/src/custom/ops/op_custom_sample.c
new file mode 100644
index 0000000..1459539
--- /dev/null
+++ b/src/tim/vx/internal/src/custom/ops/op_custom_sample.c
@@ -0,0 +1,103 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_platform.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_log.h"
+#include "kernel/vsi_nn_kernel.h"
+
+static vsi_status op_compute
+ (
+ vsi_nn_node_t * self,
+ vsi_nn_tensor_t ** inputs,
+ vsi_nn_tensor_t ** outputs
+ )
+{
+ vsi_nn_kernel_param_t *param = NULL;
+ vsi_nn_custom_sample_param *p;
+ p = &self->nn_param.custom_sample;
+
+ param = vsi_nn_kernel_param_create();
+ vsi_nn_kernel_param_add_int32(param, "axis", p->axis);
+
+ self->n = (vx_node)vsi_nn_kernel_selector(
+ self->graph,
+ "custom_sample",
+ inputs, 2,
+ outputs, 1,
+ param);
+
+ vsi_nn_kernel_param_release(¶m);
+ return VSI_SUCCESS;
+}
+
+static vsi_bool op_check
+ (
+ vsi_nn_node_t * self,
+ vsi_nn_tensor_t ** inputs,
+ vsi_nn_tensor_t ** outputs
+ )
+{
+ /*TODO: Check params. */
+ return TRUE;
+} /* op_check() */
+
+static vsi_bool op_setup
+ (
+ vsi_nn_node_t * node,
+ vsi_nn_tensor_t ** inputs,
+ vsi_nn_tensor_t ** outputs
+ )
+{
+ if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num)
+ {
+ outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+ memmove(outputs[0]->attr.size, inputs[0]->attr.size,
+ inputs[0]->attr.dim_num * sizeof(vsi_size_t));
+ }
+ return TRUE;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* Registrar */
+DEF_OP_REG
+ (
+ /* op_name */ CUSTOM_SAMPLE,
+ /* init */ NULL,
+ /* compute */ op_compute,
+ /* deinit */ vsi_nn_op_common_deinit,
+ /* check */ op_check,
+ /* setup */ op_setup,
+ /* optimize */ NULL,
+ /* input_num */ 2,
+ /* output_num */ 1
+ );
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/tim/vx/internal/src/kernel/cl/avg_pool3d_cl.c b/src/tim/vx/internal/src/kernel/cl/avg_pool3d_cl.c
new file mode 100644
index 0000000..c0ed53e
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/avg_pool3d_cl.c
@@ -0,0 +1,354 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include
+#include
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+typedef enum
+{
+ INTERNAL_KERNEL_AVG_POOL3D,
+} _internal_kernel_e;
+
+#define _AVG_POOL3D_KERNEL_SOURCE_NAME "avg_pool3d"
+
+// Add kernel hashtable here
+#define AVG_POOL3D_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
+ (( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
+#define AVG_POOL3D_KERNELS( IN_DTYPE, OUT_DTYPE ) \
+ { AVG_POOL3D_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
+ CVIVANTE_NAMESPACE("cl.avg_pool3d_"#IN_DTYPE"to"#OUT_DTYPE), \
+ _AVG_POOL3D_KERNEL_SOURCE_NAME }, \
+
+typedef struct
+{
+ uint32_t key;
+ char * function_name;
+ const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _avg_pool3d_kernel_map[] =
+{
+ // Register kernel here
+ AVG_POOL3D_KERNELS( F32, F32 )
+ AVG_POOL3D_KERNELS( F32, U32 )
+ AVG_POOL3D_KERNELS( F32, I32 )
+ AVG_POOL3D_KERNELS( U32, U32 )
+ AVG_POOL3D_KERNELS( U32, F32 )
+ AVG_POOL3D_KERNELS( I32, I32 )
+ AVG_POOL3D_KERNELS( I32, F32 )
+ AVG_POOL3D_KERNELS( BF16, BF16 )
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _avg_pool3d_kernel_param_def[] =
+{
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _AVG_POOL3D_PARAM_NUM _cnt_of_array( _avg_pool3d_kernel_param_def )
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_avg_pool3d_initializer)
+ (
+ vsi_nn_kernel_node_t node,
+ const vsi_nn_kernel_node_param_t * param,
+ size_t param_size
+ )
+{
+ gpu_param_t gpu_param = {
+ 3,
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0}
+ };
+ vsi_status status = VSI_FAILURE;
+ vx_tensor output = (vx_tensor)param[1];
+ vx_scalar depth_out = (vx_scalar)param[14];
+ int32_t depth_out_value;
+ vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
+ vsi_size_array_t *output_shape = NULL;
+
+ vxReadScalarValue(depth_out, &depth_out_value);
+ output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
+ CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
+
+ output_shape = output_attr->shape;
+
+ gpu_param.global_scale[0] = 1;
+ gpu_param.global_scale[1] = 1;
+ gpu_param.global_scale[2] = depth_out_value;
+ gpu_param.global_size[0] = (output_shape->data[0] + gpu_param.global_scale[0] - 1)
+ / gpu_param.global_scale[0];
+ gpu_param.global_size[1] = (output_shape->data[1] + gpu_param.global_scale[1] - 1)
+ / gpu_param.global_scale[1];
+ gpu_param.global_size[2] = (output_shape->data[2] + gpu_param.global_scale[2] - 1)
+ / gpu_param.global_scale[2];
+ status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+
+final:
+ if (output_attr)
+ {
+ vsi_nn_kernel_tensor_attr_release(&output_attr);
+ }
+
+ return status;
+} /* _avg_pool3d_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+ (
+ vsi_nn_kernel_t * kernel,
+ vsi_nn_tensor_t * const * const inputs,
+ vsi_nn_tensor_t * const * const outputs
+ /* Add extra params */
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_dtype_e in_dtype;
+ vsi_nn_kernel_dtype_e out_dtype;
+ const _kernel_map_type * kernel_map = _avg_pool3d_kernel_map;
+ size_t kernel_map_size = _cnt_of_array( _avg_pool3d_kernel_map );
+ vx_param_description_t * param_def = _avg_pool3d_kernel_param_def;
+ vx_kernel_initialize_f initializer = _avg_pool3d_initializer;
+
+ uint32_t key = 0;
+ uint32_t i = 0;
+
+ in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+ out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
+ (( in_dtype ) | (out_dtype << 8 ))
+ switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
+ {
+ case _PACK_SELECT_KEY(F32, F32):
+ case _PACK_SELECT_KEY(F16, F16):
+ case _PACK_SELECT_KEY(F32, F16):
+ case _PACK_SELECT_KEY(F16, F32):
+ key = AVG_POOL3D_HASH_KEY( F32, F32);
+ break;
+ case _PACK_SELECT_KEY(F32, U8):
+ case _PACK_SELECT_KEY(F16, U8):
+ key = AVG_POOL3D_HASH_KEY( F32, U32);
+ break;
+ case _PACK_SELECT_KEY(F32, I8):
+ case _PACK_SELECT_KEY(F32, I16):
+ case _PACK_SELECT_KEY(F16, I8):
+ case _PACK_SELECT_KEY(F16, I16):
+ key = AVG_POOL3D_HASH_KEY( F32, I32);
+ break;
+ case _PACK_SELECT_KEY(U8, U8):
+ key = AVG_POOL3D_HASH_KEY( U32, U32);
+ break;
+ case _PACK_SELECT_KEY(U8, F16):
+ case _PACK_SELECT_KEY(U8, F32):
+ key = AVG_POOL3D_HASH_KEY( U32, F32);
+ break;
+ case _PACK_SELECT_KEY(I8, I8):
+ case _PACK_SELECT_KEY(I8, I16):
+ case _PACK_SELECT_KEY(I16, I8):
+ case _PACK_SELECT_KEY(I16, I16):
+ key = AVG_POOL3D_HASH_KEY( I32, I32);
+ break;
+ case _PACK_SELECT_KEY(I8, F16):
+ case _PACK_SELECT_KEY(I8, F32):
+ case _PACK_SELECT_KEY(I16, F16):
+ case _PACK_SELECT_KEY(I16, F32):
+ key = AVG_POOL3D_HASH_KEY( I32, F32);
+ break;
+ default:
+ key = AVG_POOL3D_HASH_KEY( in_dtype, out_dtype);
+ break;
+ }
+#undef _PACK_SELECT_KEY
+
+ for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+ {
+ if ( kernel_map[i].key == key )
+ {
+ break;
+ }
+ }
+ if ( i < (uint32_t)kernel_map_size )
+ {
+ snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
+ kernel->info.parameters = param_def;
+ kernel->info.numParams = _cnt_of_array( _avg_pool3d_kernel_param_def );
+ kernel->info.initialize = initializer;
+ // Register code source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
+ kernel_map[i].source_name );
+ // Register binary source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+ kernel_map[i].source_name );
+ status = VSI_SUCCESS;
+ }
+ return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+ (
+ vsi_nn_graph_t * graph,
+ vsi_nn_tensor_t ** inputs,
+ size_t input_num,
+ vsi_nn_tensor_t ** outputs,
+ size_t output_num,
+ const vsi_nn_kernel_param_t * params,
+ vsi_nn_kernel_t * kernel
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_node_param_t node_params[_AVG_POOL3D_PARAM_NUM];
+ vsi_nn_kernel_node_t node = NULL;
+ int32_t width = (int32_t)inputs[0]->attr.size[0];
+ int32_t height = (int32_t)inputs[0]->attr.size[1];
+ int32_t ksize_x = vsi_nn_kernel_param_get_int32(params, "ksize_x");
+ int32_t ksize_y = vsi_nn_kernel_param_get_int32(params, "ksize_y");
+ int32_t ksize_z = vsi_nn_kernel_param_get_int32(params, "ksize_z");
+ int32_t stride_x = vsi_nn_kernel_param_get_int32(params, "stride_x");
+ int32_t stride_y = vsi_nn_kernel_param_get_int32(params, "stride_y");
+ int32_t stride_z = vsi_nn_kernel_param_get_int32(params, "stride_z");
+ int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
+ int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
+ int32_t pad_front = vsi_nn_kernel_param_get_int32(params, "pad_front");
+ int32_t depth_in = vsi_nn_kernel_param_get_int32(params, "depth_in");
+ int32_t depth_out = vsi_nn_kernel_param_get_int32(params, "depth_out");
+ int32_t count_include_pad = vsi_nn_kernel_param_get_int32(params, "count_include_pad");
+ float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
+ float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
+ float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+
+ if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
+ inputs[0]->attr.dim_num )
+ || !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
+ outputs[0]->attr.dim_num ))
+ {
+ return NULL;
+ }
+
+ outputScale = 1.0f / outputScale;
+ inputTail = -(inputTail * inputScale);
+
+ status = _query_kernel( kernel, inputs, outputs );
+ if ( VSI_SUCCESS == status)
+ {
+ node = vsi_nn_kernel_create_node( graph, kernel );
+ if ( node )
+ {
+ /* Set inputs and outputs */
+ uint32_t index = 2;
+ vsi_nn_kernel_node_pack_io( node_params, _AVG_POOL3D_PARAM_NUM,
+ inputs, input_num, outputs, output_num );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_x );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_y );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_z );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_x );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_y );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_z );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_front );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &depth_in );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &depth_out );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &count_include_pad );
+ /* Pass parameters to node. */
+ status = vsi_nn_kernel_node_pass_param( node, node_params, _AVG_POOL3D_PARAM_NUM );
+ vsi_nn_kernel_scalar_release( &node_params[2] );
+ vsi_nn_kernel_scalar_release( &node_params[3] );
+ vsi_nn_kernel_scalar_release( &node_params[4] );
+ vsi_nn_kernel_scalar_release( &node_params[5] );
+ vsi_nn_kernel_scalar_release( &node_params[6] );
+ vsi_nn_kernel_scalar_release( &node_params[7] );
+ vsi_nn_kernel_scalar_release( &node_params[8] );
+ vsi_nn_kernel_scalar_release( &node_params[9] );
+ vsi_nn_kernel_scalar_release( &node_params[10] );
+ vsi_nn_kernel_scalar_release( &node_params[11] );
+ vsi_nn_kernel_scalar_release( &node_params[12] );
+ vsi_nn_kernel_scalar_release( &node_params[13] );
+ vsi_nn_kernel_scalar_release( &node_params[14] );
+ vsi_nn_kernel_scalar_release( &node_params[15] );
+ vsi_nn_kernel_scalar_release( &node_params[16] );
+ vsi_nn_kernel_scalar_release( &node_params[17] );
+ vsi_nn_kernel_scalar_release( &node_params[18] );
+ vsi_nn_kernel_scalar_release( &node_params[19] );
+ }
+ }
+ return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( avg_pool3d, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cl/bilinear_grid_sample_cl.c b/src/tim/vx/internal/src/kernel/cl/bilinear_grid_sample_cl.c
new file mode 100644
index 0000000..bda96ff
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/bilinear_grid_sample_cl.c
@@ -0,0 +1,381 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include
+#include
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+typedef enum
+{
+ INTERNAL_KERNEL_BILINEAR_GRID_SAMPLE,
+} _internal_kernel_e;
+
+#define _BILINEAR_GRID_SAMPLE_KERNEL_SOURCE() "bilinear_grid_sample"
+
+#define STR(a) #a
+
+// Add kernel hashtable here
+#define BILINEAR_GRID_SAMPLE_HASH_KEY(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE) \
+ ((IN1_DTYPE << 20) | (IN0_DTYPE << 8) | (OUT_DTYPE))
+
+#define PACK_KERNEL_MAP(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE) \
+ { \
+ BILINEAR_GRID_SAMPLE_HASH_KEY(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE), \
+ CVIVANTE_NAMESPACE("cl.bilinear_grid_sample_" STR(IN0_DTYPE) "_" STR(IN1_DTYPE) "to" STR(OUT_DTYPE)), \
+ _BILINEAR_GRID_SAMPLE_KERNEL_SOURCE() \
+ }
+
+typedef struct
+{
+ uint32_t key;
+ char * function_name;
+ const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _bilinear_grid_sample_kernel_map[] =
+{
+ // Register kernel here
+ PACK_KERNEL_MAP(F32, F32, F32 ),
+ PACK_KERNEL_MAP(U8, U8, U8),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _bilinear_grid_sample_kernel_param_def[] =
+{
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+
+#define _BILINEAR_GRID_SAMPLE_PARAM_NUM 8
+#define _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM \
+ _cnt_of_array(_bilinear_grid_sample_kernel_param_def)
+
+#define SCALAR_HALF_INPUT0_W (3)
+#define SCALAR_HALF_INPUT0_H (4)
+#define SCALAR_ADD_VALUE_W (5)
+#define SCALAR_ADD_VALUE_H (6)
+#define SCALAR_DEPTH (7)
+#define SCALAR_INPUT0_SCALE (8)
+#define SCALAR_INPUT0_TAIL (9)
+#define SCALAR_INPUT1_SCALE (10)
+#define SCALAR_INPUT1_TAIL (11)
+#define SCALAR_OUTPUT_SCALE (12)
+#define SCALAR_OUTPUT_TAIL (13)
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_bilinear_grid_sample_initializer)
+ (
+ vsi_nn_kernel_node_t node,
+ const vsi_nn_kernel_node_param_t * param,
+ size_t param_size
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ gpu_param_t gpu_param = {3, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
+ vsi_nn_kernel_tensor_attr_t* output_attr = NULL;
+ vsi_size_array_t* out_shape = NULL;
+
+ output_attr =
+ vsi_nn_kernel_tensor_attr_create((vsi_nn_kernel_tensor_t)param[2]);
+ CHECK_PTR_FAIL_GOTO(output_attr, "Create tensor attr buffer fail.", final);
+
+ out_shape = output_attr->shape;
+
+ gpu_param.global_scale[0] = 1;
+ gpu_param.global_scale[1] = 1;
+ gpu_param.global_scale[2] = 1;
+
+ gpu_param.dim = 2;
+ gpu_param.global_size[0] =
+ gpu_align_p2((out_shape->data[0] + gpu_param.global_scale[0] - 1) /
+ gpu_param.global_scale[0],
+ 4);
+ gpu_param.global_size[1] =
+ ((out_shape->data[1] + gpu_param.global_scale[1] - 1) /
+ gpu_param.global_scale[1]);
+ gpu_param.global_size[2] = 1;
+ status = vsi_nn_kernel_gpu_config(node, &gpu_param);
+
+final:
+#define SAFE_FREE_TENSOR_ATTR(_PTR) \
+ if (_PTR) { \
+ vsi_nn_kernel_tensor_attr_release(&_PTR); \
+ _PTR = NULL; \
+ }
+ SAFE_FREE_TENSOR_ATTR(output_attr);
+ return status;
+} /* _bilinear_grid_sample_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+ (
+ vsi_nn_kernel_t * kernel,
+ vsi_nn_tensor_t * const * const inputs,
+ vsi_nn_tensor_t * const * const outputs,
+ vsi_bool* is_use_u8_kernel
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_dtype_e in0_dtype, in1_dtype;
+ vsi_nn_kernel_dtype_e out_dtype;
+ const _kernel_map_type * kernel_map = _bilinear_grid_sample_kernel_map;
+ size_t kernel_map_size = _cnt_of_array( _bilinear_grid_sample_kernel_map );
+ vx_param_description_t * param_def = _bilinear_grid_sample_kernel_param_def;
+ size_t param_def_size = _cnt_of_array(_bilinear_grid_sample_kernel_param_def);
+ vx_kernel_initialize_f initializer = _bilinear_grid_sample_initializer;
+
+ uint32_t key;
+ uint32_t i;
+
+ in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+ in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
+ out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+ if (F16 == in0_dtype) {
+ in0_dtype = F32;
+ }
+ if (F16 == in1_dtype) {
+ in1_dtype = F32;
+ }
+ if (F16 == out_dtype) {
+ out_dtype = F32;
+ }
+ if ((U8 == in0_dtype) || (U8 == out_dtype)) {
+ param_def_size = _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM;
+ *is_use_u8_kernel = TRUE;
+ } else {
+ param_def_size = _BILINEAR_GRID_SAMPLE_PARAM_NUM;
+ *is_use_u8_kernel = FALSE;
+ }
+
+ key = BILINEAR_GRID_SAMPLE_HASH_KEY(in0_dtype, in1_dtype, out_dtype);
+
+ for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+ {
+ if ( kernel_map[i].key == key )
+ {
+ break;
+ }
+ }
+ if ( i < (uint32_t)kernel_map_size )
+ {
+ snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
+ kernel->info.parameters = param_def;
+ kernel->info.numParams = (uint32_t)param_def_size;
+ kernel->info.initialize = initializer;
+ // Register code source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
+ kernel_map[i].source_name );
+ // Register binary source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+ kernel_map[i].source_name );
+ status = VSI_SUCCESS;
+ }
+ return status;
+} /* _query_kernel() */
+
+static vsi_nn_kernel_node_t _setup
+ (
+ vsi_nn_graph_t * graph,
+ vsi_nn_tensor_t ** inputs,
+ size_t input_num,
+ vsi_nn_tensor_t ** outputs,
+ size_t output_num,
+ const vsi_nn_kernel_param_t * params,
+ vsi_nn_kernel_t * kernel
+ )
+{
+ vsi_nn_kernel_node_t node = NULL;
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_node_param_t node_params[_BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM];
+ vsi_size_t final_shape[VSI_NN_MAX_DIM_NUM] = {1, 1, 1, 1};
+ uint32_t final_in1_rank = 0;
+ vsi_nn_tensor_t* rs_tensors = NULL;
+ vsi_nn_tensor_t* final_tensors[3] = {NULL};
+ vsi_size_t in0_width = inputs[0]->attr.size[0];
+ vsi_size_t in0_height = inputs[0]->attr.size[1];
+ float input0_zp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+ float input0_scale = vsi_nn_get_tensor_scale(inputs[0]);
+ float input0_tail = -(input0_zp * input0_scale);
+ float input1_zp = (float)vsi_nn_get_tensor_zero_point(inputs[1]);
+ float input1_scale = vsi_nn_get_tensor_scale(inputs[1]);
+ float input1_tail = -(input1_zp * input1_scale);
+ float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ float output_scale = 1.0f / vsi_nn_get_tensor_scale(outputs[0]);
+ vsi_bool is_use_u8_kernel = FALSE;
+ int32_t align_corners =
+ vsi_nn_kernel_param_get_int32(params, "align_corners");
+ uint32_t pad_val = 0;
+ int32_t depth = 0;
+ vsi_nn_kernel_dtype_e in0_dtype;
+
+ float half_input0_w, half_input0_h, add_float_value_w, add_float_value_h;
+
+ // Check if gpu can support the size
+ if (!vsi_nn_kernel_gpu_check_shape(inputs[0]->attr.size,
+ inputs[0]->attr.dim_num)) {
+ return NULL;
+ }
+
+ if (!vsi_nn_kernel_gpu_check_shape(inputs[1]->attr.size,
+ inputs[1]->attr.dim_num)) {
+ return NULL;
+ }
+
+ final_tensors[0] = inputs[0];
+
+ if (inputs[1]->attr.dim_num >= 3) {
+
+ final_shape[0] = inputs[1]->attr.size[1] * inputs[1]->attr.size[0];
+ final_shape[1] = inputs[1]->attr.size[2];
+ final_shape[2] = 1;
+ final_shape[3] = inputs[1]->attr.dim_num > 3 ? inputs[1]->attr.size[3] : 1;
+ final_in1_rank =
+ inputs[1]->attr.dim_num == 3 ? 2 : inputs[1]->attr.dim_num;
+ if (!vsi_nn_kernel_gpu_check_shape(final_shape, final_in1_rank)) {
+ return NULL;
+ }
+
+ rs_tensors = vsi_nn_reshape_tensor(graph, inputs[1], final_shape, final_in1_rank);
+ final_tensors[1] = rs_tensors;
+ } else {
+ final_tensors[1] = inputs[1];
+ }
+ final_tensors[2] = outputs[0];
+
+ if (align_corners) {
+ half_input0_w = ((float)in0_width - 1.0f) * 0.5f;
+ half_input0_h = ((float)in0_height - 1.0f) * 0.5f;
+ add_float_value_w = half_input0_w;
+ add_float_value_h = half_input0_h;
+ } else {
+ half_input0_w = (float)in0_width * 0.5f;
+ half_input0_h = (float)in0_height * 0.5f;
+ add_float_value_w = half_input0_w - 0.5f;
+ add_float_value_h = half_input0_h - 0.5f;
+ }
+
+ depth = (int32_t)inputs[0]->attr.size[2];
+ in0_dtype = vsi_nn_kernel_map_dtype(inputs[0]->attr.dtype.vx_type);
+ if (U8 == in0_dtype) {
+ pad_val = inputs[0]->attr.dtype.zero_point;
+ }
+ status = _query_kernel(kernel, inputs, outputs, &is_use_u8_kernel);
+ if ( VSI_SUCCESS == status)
+ {
+ node = vsi_nn_kernel_create_node( graph, kernel );
+ if ( node )
+ {
+ size_t node_params_num = _BILINEAR_GRID_SAMPLE_PARAM_NUM;
+ /* Set inputs and outputs */
+ vsi_nn_kernel_node_pack_io( node_params, _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM,
+ final_tensors, input_num, &final_tensors[2], output_num );
+ node_params[SCALAR_HALF_INPUT0_W] = vsi_nn_kernel_scalar_create( graph, F32, &half_input0_w );
+ node_params[SCALAR_HALF_INPUT0_H] = vsi_nn_kernel_scalar_create( graph, F32, &half_input0_h );
+ node_params[SCALAR_ADD_VALUE_W] = vsi_nn_kernel_scalar_create( graph, F32, &add_float_value_w );
+ node_params[SCALAR_ADD_VALUE_H] = vsi_nn_kernel_scalar_create( graph, F32, &add_float_value_h );
+ node_params[SCALAR_DEPTH] = vsi_nn_kernel_scalar_create( graph, I32, &depth );
+ if (is_use_u8_kernel)
+ {
+ node_params[SCALAR_INPUT0_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input0_scale );
+ node_params[SCALAR_INPUT0_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &input0_tail );
+ node_params[SCALAR_INPUT1_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input1_scale );
+ node_params[SCALAR_INPUT1_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &input1_tail );
+ node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
+ node_params[SCALAR_OUTPUT_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &output_zp );
+ node_params_num = _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM;
+ }
+ /* Pass parameters to node. */
+ status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
+ VSI_ASSERT(status == VSI_SUCCESS);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_HALF_INPUT0_W]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_HALF_INPUT0_H]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_ADD_VALUE_W]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_ADD_VALUE_H]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_DEPTH]);
+ if (is_use_u8_kernel) {
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT0_SCALE]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT0_TAIL]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT1_SCALE]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT1_TAIL]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_OUTPUT_SCALE]);
+ vsi_nn_kernel_scalar_release(&node_params[SCALAR_OUTPUT_TAIL]);
+ }
+ {
+ // Set default border mode.
+ vx_border_t border;
+ border.mode = VX_BORDER_CONSTANT;
+ border.constant_value.U32 = pad_val;
+ status = vxSetNodeAttribute(
+ (vx_node)node, VX_NODE_BORDER, &border, sizeof(border));
+ CHECK_STATUS(status);
+ }
+ }
+ }
+
+ vsi_safe_release_tensor(rs_tensors);
+
+ return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( bilinear_grid_sample, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cl/clip_cl.c b/src/tim/vx/internal/src/kernel/cl/clip_cl.c
index 38defcc..4b518b2 100644
--- a/src/tim/vx/internal/src/kernel/cl/clip_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/clip_cl.c
@@ -35,6 +35,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
+#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
__BEGIN_DECLS
@@ -258,19 +259,36 @@ static vsi_nn_kernel_node_t _setup
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float min_value = vsi_nn_kernel_param_get_float32( params, "min_value" );
float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
+ vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
+ vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
+ vsi_size_t new_rank = 0;
+ vsi_bool ret = TRUE;
- outputScale = 1.0f / outputScale;
- inputTail = -(inputTail * inputScale);
+ ret = vsi_nn_kernel_optimize_element_shape(
+ inputs[0]->attr.size, inputs[0]->attr.dim_num, shape, &new_rank);
- if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
- inputs[0]->attr.dim_num ) )
+ if ( ret )
{
return NULL;
}
- image_2d = (inputs[0]->attr.dim_num == 2 || inputs[0]->attr.size[2] == 1);
+ reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
+ inputs[0], shape, new_rank );
+ reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
+ outputs[0], shape, new_rank );
- status = _query_kernel( kernel, inputs, outputs, image_2d);
+ outputScale = 1.0f / outputScale;
+ inputTail = -(inputTail * inputScale);
+
+ if( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[0]->attr.size,
+ reshape_tensors[0]->attr.dim_num ) )
+ {
+ return NULL;
+ }
+
+ image_2d = (reshape_tensors[0]->attr.dim_num == 2 || reshape_tensors[0]->attr.size[2] == 1);
+
+ status = _query_kernel( kernel, reshape_tensors, &reshape_tensors[1], image_2d);
if ( VSI_SUCCESS == status )
{
@@ -279,7 +297,7 @@ static vsi_nn_kernel_node_t _setup
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _CLIP_PARAM_NUM,
- inputs, input_num, outputs, output_num );
+ reshape_tensors, input_num, &reshape_tensors[1], output_num );
node_params[SCALAR_MIN_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &min_value );
node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
@@ -297,6 +315,10 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
}
}
+
+ vsi_safe_release_tensor( reshape_tensors[0] );
+ vsi_safe_release_tensor( reshape_tensors[1] );
+
return node;
} /* _setup() */
diff --git a/src/tim/vx/internal/src/kernel/cl/comparisons_cl.c b/src/tim/vx/internal/src/kernel/cl/comparisons_cl.c
index 4be70d9..8fec39b 100644
--- a/src/tim/vx/internal/src/kernel/cl/comparisons_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/comparisons_cl.c
@@ -34,6 +34,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
+#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
@@ -287,7 +288,7 @@ static vsi_status _query_kernel
int i;
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
- input1_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+ input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (outputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_NONE && output_dtype == I8)
@@ -335,31 +336,85 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_CL_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
- int32_t operation = 0;
+ int32_t operation = vsi_nn_kernel_param_get_int32( params, "operation" );
+ vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
+ vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = { { 0 } };
+ vsi_size_t new_rank = 0;
+ vsi_bool ret = FALSE;
float input0Scale = vsi_nn_get_tensor_scale(inputs[0]);
float input0Tail = (float)vsi_nn_get_tensor_zero_point(inputs[0]) * input0Scale;
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]) * input1Scale;
- if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
- outputs[0]->attr.dim_num ) )
+ ret = vsi_nn_kernel_optimize_eltwise_shape(
+ inputs[0]->attr.size, inputs[0]->attr.dim_num,
+ inputs[1]->attr.size, inputs[1]->attr.dim_num,
+ outputs[0]->attr.size, outputs[0]->attr.dim_num,
+ shapes[0], shapes[1], shapes[2], &new_rank );
+
+ if ( ret )
{
- return NULL;
+ reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
+ inputs[0], shapes[0], new_rank );
+ reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
+ inputs[1], shapes[1], new_rank );
+ reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
+ outputs[0], shapes[2], new_rank );
+
+#define _swap_tensor(a, b, tmp) \
+ do { \
+ tmp = a; \
+ a = b; \
+ b = tmp; \
+ } while(0)
+
+ if (shapes[1][3] > shapes[0][3] && new_rank == 4)
+ {
+ vsi_nn_tensor_t* reshape_tmp;
+ _swap_tensor(reshape_tensors[0], reshape_tensors[1], reshape_tmp);
+
+ if (VSI_NN_RELATIONAL_OPS_GREAT == operation)
+ {
+ operation = VSI_NN_RELATIONAL_OPS_LESS;
+ }
+ else if (VSI_NN_RELATIONAL_OPS_LESS == operation)
+ {
+ operation = VSI_NN_RELATIONAL_OPS_GREAT;
+ }
+ else if (VSI_NN_RELATIONAL_OPS_GREAT_EQUAL == operation)
+ {
+ operation = VSI_NN_RELATIONAL_OPS_LESS_EQUAL;
+ }
+ else if (VSI_NN_RELATIONAL_OPS_LESS_EQUAL == operation)
+ {
+ operation = VSI_NN_RELATIONAL_OPS_GREAT_EQUAL;
+ }
+ }
+
+#undef _swap_tensor
+ }
+ else
+ {
+ goto final;
}
- operation = vsi_nn_kernel_param_get_int32( params, "operation" );
+ if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[2]->attr.size,
+ reshape_tensors[2]->attr.dim_num ) )
+ {
+ goto final;
+ }
- image_2d = (outputs[0]->attr.dim_num == 2);
- status = _query_kernel( inputs, outputs, operation, image_2d, kernel );
- if( VSI_SUCCESS == status)
+ image_2d = (reshape_tensors[2]->attr.dim_num == 2 || reshape_tensors[2]->attr.size[2] == 1);
+ status = _query_kernel( reshape_tensors, &reshape_tensors[2], operation, image_2d, kernel );
+ if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
+ if ( node )
{
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
- inputs, 2, outputs, 1 );
+ reshape_tensors, 2, &reshape_tensors[2], 1 );
node_params[SCALAR_INPUT0_SCALE] = vsi_nn_kernel_scalar_create(
graph, F32, &input0Scale );
node_params[SCALAR_INPUT0_TAIL] = vsi_nn_kernel_scalar_create(
@@ -379,6 +434,12 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT1_TAIL] );
}
}
+
+final:
+ vsi_safe_release_tensor( reshape_tensors[0] );
+ vsi_safe_release_tensor( reshape_tensors[1] );
+ vsi_safe_release_tensor( reshape_tensors[2] );
+
return node;
} /* _setup() */
diff --git a/src/tim/vx/internal/src/kernel/cl/cumsum_cl.c b/src/tim/vx/internal/src/kernel/cl/cumsum_cl.c
index 91746ab..0aac099 100644
--- a/src/tim/vx/internal/src/kernel/cl/cumsum_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/cumsum_cl.c
@@ -69,14 +69,19 @@ static const struct {
{
HASH_CUMSUM_KERNELS(0, U8, U8)
HASH_CUMSUM_KERNELS(0, F32, F32)
+ HASH_CUMSUM_KERNELS(0, F32, U8)
HASH_CUMSUM_KERNELS(1, U8, U8)
HASH_CUMSUM_KERNELS(1, F32, F32)
+ HASH_CUMSUM_KERNELS(1, F32, U8)
HASH_CUMSUM_KERNELS(2, U8, U8)
HASH_CUMSUM_KERNELS(2, F32, F32)
+ HASH_CUMSUM_KERNELS(2, F32, U8)
HASH_CUMSUM_KERNELS_2D(0, U8, U8)
HASH_CUMSUM_KERNELS_2D(0, F32, F32)
+ HASH_CUMSUM_KERNELS_2D(0, F32, U8)
HASH_CUMSUM_KERNELS_2D(1, U8, U8)
HASH_CUMSUM_KERNELS_2D(1, F32, F32)
+ HASH_CUMSUM_KERNELS_2D(1, F32, U8)
};
/*
diff --git a/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c b/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
index 7e1d681..5d29c67 100644
--- a/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
@@ -56,6 +56,10 @@ typedef enum
UNARY_RCP,
UNARY_SIGN,
UNARY_SOFTSIGN,
+ UNARY_ATAN,
+ UNARY_ATANH,
+ UNARY_ACOSH,
+ UNARY_INVERSE_SIGMOID,
} unary_type_e;
/*
@@ -100,10 +104,18 @@ typedef enum
#define RCP_OPERATION rcp
#define SIGN_OPERATION sign
#define SOFTSIGN_OPERATION softsign
+#define ATAN_OPERATION atan
+#define ATANH_OPERATION atanh
+#define ACOSH_OPERATION acosh
+#define INVERSE_SIGMOID_OPERATION inverse_sigmoid
-#define ADD_UNARY_SH_KERNELS(name, src_type, dst_type) \
- TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, src_type, dst_type) \
- TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, src_type, dst_type)
+#define ADD_UNARY_SH_KERNELS(name) \
+ TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, F32, F32) \
+ TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, F32, F32) \
+ TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, U8, U8) \
+ TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, U8, U8) \
+ TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, U8, F32) \
+ TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, U8, F32)
static const struct {
uint32_t key;
@@ -111,39 +123,28 @@ static const struct {
const char* source_name;
} kernel_map[] =
{
- ADD_UNARY_SH_KERNELS(SIN, F32, F32)
- ADD_UNARY_SH_KERNELS(COS, F32, F32)
- ADD_UNARY_SH_KERNELS(EXP, F32, F32)
- ADD_UNARY_SH_KERNELS(LOG, F32, F32)
- ADD_UNARY_SH_KERNELS(NEG, F32, F32)
- ADD_UNARY_SH_KERNELS(HSIGMOID, F32, F32)
- ADD_UNARY_SH_KERNELS(MISH, F32, F32)
- ADD_UNARY_SH_KERNELS(ROUND, F32, F32)
- ADD_UNARY_SH_KERNELS(GELU, F32, F32)
- ADD_UNARY_SH_KERNELS(HGELU, F32, F32)
- ADD_UNARY_SH_KERNELS(SELU, F32, F32)
- ADD_UNARY_SH_KERNELS(CELU, F32, F32)
- ADD_UNARY_SH_KERNELS(RCP, F32, F32)
- ADD_UNARY_SH_KERNELS(SIGN, F32, F32)
- ADD_UNARY_SH_KERNELS(SOFTSIGN, F32, F32)
+ ADD_UNARY_SH_KERNELS(SIN)
+ ADD_UNARY_SH_KERNELS(COS)
+ ADD_UNARY_SH_KERNELS(EXP)
+ ADD_UNARY_SH_KERNELS(LOG)
+ ADD_UNARY_SH_KERNELS(NEG)
+ ADD_UNARY_SH_KERNELS(HSIGMOID)
+ ADD_UNARY_SH_KERNELS(MISH)
+ ADD_UNARY_SH_KERNELS(ROUND)
+ ADD_UNARY_SH_KERNELS(GELU)
+ ADD_UNARY_SH_KERNELS(HGELU)
+ ADD_UNARY_SH_KERNELS(SELU)
+ ADD_UNARY_SH_KERNELS(CELU)
+ ADD_UNARY_SH_KERNELS(RCP)
+ ADD_UNARY_SH_KERNELS(SIGN)
+ ADD_UNARY_SH_KERNELS(SOFTSIGN)
+ ADD_UNARY_SH_KERNELS(ATAN)
+ ADD_UNARY_SH_KERNELS(ATANH)
+ ADD_UNARY_SH_KERNELS(ACOSH)
+ ADD_UNARY_SH_KERNELS(INVERSE_SIGMOID)
- ADD_UNARY_SH_KERNELS(SIN, U8, U8)
- ADD_UNARY_SH_KERNELS(COS, U8, U8)
- ADD_UNARY_SH_KERNELS(EXP, U8, U8)
- ADD_UNARY_SH_KERNELS(LOG, U8, U8)
- ADD_UNARY_SH_KERNELS(NEG, U8, U8)
- ADD_UNARY_SH_KERNELS(HSIGMOID, U8, U8)
- ADD_UNARY_SH_KERNELS(MISH, U8, U8)
- ADD_UNARY_SH_KERNELS(ROUND, U8, U8)
- ADD_UNARY_SH_KERNELS(GELU, U8, U8)
- ADD_UNARY_SH_KERNELS(HGELU, U8, U8)
- ADD_UNARY_SH_KERNELS(SELU, U8, U8)
- ADD_UNARY_SH_KERNELS(CELU, U8, U8)
- ADD_UNARY_SH_KERNELS(RCP, U8, U8)
- ADD_UNARY_SH_KERNELS(SIGN, U8, U8)
- ADD_UNARY_SH_KERNELS(SOFTSIGN, U8, U8)
-
- ADD_UNARY_SH_KERNELS(NEG, I32, I32)
+ TENSOR_UNARY_KERNELS_3D(NEG_OPERATION, UNARY_NEG, I32, I32)
+ TENSOR_UNARY_KERNELS_2D(NEG_OPERATION, UNARY_NEG, I32, I32)
};
#undef SIN_OPERATION
@@ -161,6 +162,10 @@ static const struct {
#undef RCP_OPERATION
#undef SIGN_OPERATION
#undef SOFTSIGN_OPERATION
+#undef ATAN_OPERATION
+#undef ATANH_OPERATION
+#undef ACOSH_OPERATION
+#undef INVERSE_SIGMOID_OPERATION
/*
* Kernel params
*/
@@ -262,6 +267,10 @@ static vsi_status _query_kernel
case _PACK_SELECT_KEY(F16, F16):
key = HASH_UNARY_KEY( type, F32, F32, image_2d );
break;
+ case _PACK_SELECT_KEY(U8, F32):
+ case _PACK_SELECT_KEY(U8, F16):
+ key = HASH_UNARY_KEY( type, U8, F32, image_2d );
+ break;
default:
key = HASH_UNARY_KEY( type, input_dtype, output_dtype, image_2d );
break;
@@ -330,7 +339,7 @@ static vsi_nn_kernel_node_t _setup
ret = vsi_nn_kernel_optimize_element_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num,
shape, &new_rank );
- if( ret )
+ if ( ret )
{
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shape, new_rank );
@@ -338,7 +347,7 @@ static vsi_nn_kernel_node_t _setup
outputs[0], shape, new_rank );
}
- if( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
+ if ( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
rs_tensors[0]->attr.dim_num ) )
{
return NULL;
@@ -348,11 +357,11 @@ static vsi_nn_kernel_node_t _setup
image_2d = (rs_tensors[0]->attr.dim_num == 2 || rs_tensors[0]->attr.size[2] == 1);
status = _query_kernel( rs_tensors, &rs_tensors[1], unary_type, image_2d, kernel );
- if( VSI_SUCCESS == status)
+ if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
+ if ( node )
{
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
rs_tensors, 1, &rs_tensors[1], 1 );
@@ -452,5 +461,9 @@ REGISTER_ELTWISE_UNARY_BACKEND_CL( celu, UNARY_CELU )
REGISTER_ELTWISE_UNARY_BACKEND_CL( rcp, UNARY_RCP )
REGISTER_ELTWISE_UNARY_BACKEND_CL( sign, UNARY_SIGN )
REGISTER_ELTWISE_UNARY_BACKEND_CL( softsign, UNARY_SOFTSIGN )
+REGISTER_ELTWISE_UNARY_BACKEND_CL( atan, UNARY_ATAN )
+REGISTER_ELTWISE_UNARY_BACKEND_CL( atanh, UNARY_ATANH )
+REGISTER_ELTWISE_UNARY_BACKEND_CL( acosh, UNARY_ACOSH )
+REGISTER_ELTWISE_UNARY_BACKEND_CL( inverse_sigmoid, UNARY_INVERSE_SIGMOID )
__END_DECLS
diff --git a/src/tim/vx/internal/src/kernel/cl/gather_cl.c b/src/tim/vx/internal/src/kernel/cl/gather_cl.c
index 66eb842..bafe86c 100644
--- a/src/tim/vx/internal/src/kernel/cl/gather_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/gather_cl.c
@@ -49,6 +49,7 @@ typedef enum
#define _GATHER_KERNEL_SOURCE "gather"
#define _GATHER_BATCH_KERNEL_SOURCE "gather_batch"
+#define _GATHER_ARRAY_KERNEL_SOURCE "gather_array"
// Add kernel hashtable here
#define VX_KERNEL_NAME_GATHER_U8TOU8 CVIVANTE_NAMESPACE("cl.gather_U8toU8")
@@ -61,9 +62,14 @@ typedef enum
#define VX_KERNEL_NAME_GATHER_BATCH_I32TOI32 CVIVANTE_NAMESPACE("cl.gather_batch_I32toI32")
#define VX_KERNEL_NAME_GATHER_BATCH_F32TOF32 CVIVANTE_NAMESPACE("cl.gather_batch_F32toF32")
+#define VX_KERNEL_NAME_GATHER_ARRAY_U8TOU8 CVIVANTE_NAMESPACE("cl.gather_array_U8toU8")
+#define VX_KERNEL_NAME_GATHER_ARRAY_F16TOF16 CVIVANTE_NAMESPACE("cl.gather_array_F16toF16")
+#define VX_KERNEL_NAME_GATHER_ARRAY_I32TOI32 CVIVANTE_NAMESPACE("cl.gather_array_I32toI32")
+#define VX_KERNEL_NAME_GATHER_ARRAY_F32TOF32 CVIVANTE_NAMESPACE("cl.gather_array_F32toF32")
+
// Add kernel hashtable here
-#define HASH_GATHER_KEY(_input0_type, _input1_type, _output_type, _image_2d, _batch) \
- ((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_image_2d << 4) | (_batch))
+#define HASH_GATHER_KEY(_input0_type, _input1_type, _output_type, _is_array, _batch) \
+ ((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_is_array << 4) | (_batch))
#define TENSOR_GATHER_KERNELS(IN0_TYPE, IN1TYPE, OUT_TYPE, SOURCE) \
{ HASH_GATHER_KEY(IN0_TYPE, IN1TYPE, OUT_TYPE, 0, 0), \
@@ -75,6 +81,11 @@ typedef enum
VX_KERNEL_NAME_GATHER_BATCH_##IN0_TYPE##TO##OUT_TYPE, \
SOURCE },
+#define TENSOR_GATHER_ARRAY_KERNELS(IN0_TYPE, IN1TYPE, OUT_TYPE, SOURCE) \
+ { HASH_GATHER_KEY(IN0_TYPE, IN1TYPE, OUT_TYPE, 1, 0), \
+ VX_KERNEL_NAME_GATHER_ARRAY_##IN0_TYPE##TO##OUT_TYPE, \
+ SOURCE },
+
static const struct {
uint32_t key;
char* function_name;
@@ -89,6 +100,10 @@ static const struct {
TENSOR_GATHER_BATCH_KERNELS(F16, I32, F16, _GATHER_BATCH_KERNEL_SOURCE)
TENSOR_GATHER_BATCH_KERNELS(I32, I32, I32, _GATHER_BATCH_KERNEL_SOURCE)
TENSOR_GATHER_BATCH_KERNELS(F32, I32, F32, _GATHER_BATCH_KERNEL_SOURCE)
+ TENSOR_GATHER_ARRAY_KERNELS(U8, I32, U8, _GATHER_ARRAY_KERNEL_SOURCE)
+ TENSOR_GATHER_ARRAY_KERNELS(F16, I32, F16, _GATHER_ARRAY_KERNEL_SOURCE)
+ TENSOR_GATHER_ARRAY_KERNELS(I32, I32, I32, _GATHER_ARRAY_KERNEL_SOURCE)
+ TENSOR_GATHER_ARRAY_KERNELS(F32, I32, F32, _GATHER_ARRAY_KERNEL_SOURCE)
};
/*
@@ -114,7 +129,8 @@ static vsi_status cal_gather_tensor_reshape_size
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
uint32_t block_size,
vsi_size_t batch_dims,
- uint32_t idxFlg
+ uint32_t idxFlg,
+ int32_t* arrayFlg
)
{
vsi_status status = VSI_FAILURE;
@@ -148,18 +164,19 @@ static vsi_status cal_gather_tensor_reshape_size
}
else
{
- if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
+ sizes[0] = block_size;
+ sizes[1] = elementCnt / block_size;
+ sizes[2] = outerCnt;
+ if ((elementCnt / block_size) >= VSI_NN_MAX_IMAGE_WIDTH)
{
- sizes[0] = block_size;
- sizes[1] = elementCnt / block_size;
- sizes[2] = outerCnt;
- status = VSI_SUCCESS;
+ arrayFlg[0] |= 1;
}
+ status = VSI_SUCCESS;
}
#undef VSI_NN_MAX_IMAGE_WIDTH
return status;
-} /* _get_EltOP_tensor_reshape_size */
+} /* cal_gather_tensor_reshape_size */
/*
* Kernel initializer
@@ -209,8 +226,7 @@ DEF_KERNEL_INITIALIZER(_gather_initializer)
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
- gpu_param.global_size[0] = gpu_align_p2((block_size + gpu_param.global_scale[0] - 1)
- / gpu_param.global_scale[0], 4);
+ gpu_param.global_size[0] = block_size;
gpu_param.global_size[1] = indices_num;
gpu_param.global_size[2] = block_num;
@@ -239,7 +255,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
- int32_t is_batch
+ int32_t is_batch,
+ int32_t is_array
/* Add extra params */
)
{
@@ -262,7 +279,7 @@ static vsi_status _query_kernel
output_dtype = I32;
}
- key = HASH_GATHER_KEY( input0_dtype, I32, output_dtype, 0, is_batch );
+ key = HASH_GATHER_KEY( input0_dtype, I32, output_dtype, is_array, is_batch );
for ( i = 0; i < _cnt_of_array(gather_map); i ++ )
{
@@ -314,11 +331,12 @@ static vsi_nn_kernel_node_t _setup
int32_t indices_num = vsi_nn_kernel_param_get_int32( params, "indices_num" );
int32_t is_batch = batch_dims > 0 ? 1 : 0;
vsi_size_t rs_dim = batch_dims == 0 ? 2 : 3;
+ int32_t is_array = block_size >= GPU_TENSOR_MAX_WIDTH ? 1 : 0;
int32_t i = 0;
- status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0);
- status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1);
- status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0);
+ status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0, &is_array);
+ status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1, &is_array);
+ status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0, &is_array);
if (status != VSI_SUCCESS)
{
return NULL;
@@ -337,7 +355,7 @@ static vsi_nn_kernel_node_t _setup
return NULL;
}
- status = _query_kernel( kernel, inputs, outputs, is_batch );
+ status = _query_kernel( kernel, inputs, outputs, is_batch, is_array );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
diff --git a/src/tim/vx/internal/src/kernel/cl/gather_nd_cl.c b/src/tim/vx/internal/src/kernel/cl/gather_nd_cl.c
index 74dd993..a41e7ac 100644
--- a/src/tim/vx/internal/src/kernel/cl/gather_nd_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/gather_nd_cl.c
@@ -43,6 +43,7 @@ __BEGIN_DECLS
*/
#define KERNEL_SOURCE_1 "gather_nd"
#define KERNEL_SOURCE_2 "gather_nd_3d"
+#define KERNEL_SOURCE_3 "gather_nd_batch"
typedef enum
{
@@ -52,17 +53,25 @@ __BEGIN_DECLS
_3D
} vsi_nn_kernel_coord_type_e;
-#define HASH_GATHER_ND_KEY(_input0_type, _input1_type, _output_type, _coord_dim) \
- ((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_coord_dim))
+#define HASH_GATHER_ND_KEY(_input0_type, _input1_type, _output_type, _coord_dim, _batch_dims) \
+ ((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_coord_dim << 4) | (_batch_dims))
#define HASH_GATHER_ND_SH_KERNEL_NAME(SRC0_TYPE, DST_TYPE, COORD_TYPE) \
CVIVANTE_NAMESPACE("cl.gather_nd_"#SRC0_TYPE"to"#DST_TYPE#COORD_TYPE)
#define TENSOR_GATHER_ND_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, SOURCE) \
- { HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE), \
+ { HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, 0), \
HASH_GATHER_ND_SH_KERNEL_NAME(IN0_TYPE, OUT_TYPE, COORD_TYPE), \
SOURCE },
+#define HASH_GATHER_ND_BATCH_SH_KERNEL_NAME(SRC0_TYPE, DST_TYPE, COORD_TYPE) \
+ CVIVANTE_NAMESPACE("cl.gather_nd_batch_"#SRC0_TYPE"to"#DST_TYPE#COORD_TYPE)
+
+#define TENSOR_GATHER_ND_BATCH_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, SOURCE) \
+ { HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, 1), \
+ HASH_GATHER_ND_BATCH_SH_KERNEL_NAME(IN0_TYPE, OUT_TYPE, COORD_TYPE), \
+ SOURCE },
+
static const struct {
uint32_t key;
char* function_name;
@@ -81,6 +90,12 @@ static const struct {
TENSOR_GATHER_ND_KERNELS(F16, I32, F16, _3D, KERNEL_SOURCE_2)
TENSOR_GATHER_ND_KERNELS(I32, I32, I32, _3D, KERNEL_SOURCE_2)
TENSOR_GATHER_ND_KERNELS(F32, I32, F32, _3D, KERNEL_SOURCE_2)
+ TENSOR_GATHER_ND_BATCH_KERNELS(U8, I32, U8, _1D, KERNEL_SOURCE_3)
+ TENSOR_GATHER_ND_BATCH_KERNELS(I8, I32, I8, _1D, KERNEL_SOURCE_3)
+ TENSOR_GATHER_ND_BATCH_KERNELS(F16, I32, F16, _1D, KERNEL_SOURCE_3)
+ TENSOR_GATHER_ND_BATCH_KERNELS(U8, I32, U8, _2D, KERNEL_SOURCE_3)
+ TENSOR_GATHER_ND_BATCH_KERNELS(I8, I32, I8, _2D, KERNEL_SOURCE_3)
+ TENSOR_GATHER_ND_BATCH_KERNELS(F16, I32, F16, _2D, KERNEL_SOURCE_3)
};
/*
@@ -103,7 +118,8 @@ static vsi_status cal_gather_nd_tensor_reshape_size
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
uint32_t block_size,
uint32_t coordDim,
- int32_t* newDim
+ int32_t* newDim,
+ int32_t batch_dims
)
{
vsi_status status = VSI_FAILURE;
@@ -114,45 +130,63 @@ static vsi_status cal_gather_nd_tensor_reshape_size
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
newDim[0] = 0;
- for(i = 0; i < dims_num; ++i)
+ for (i = 0; i < dims_num; ++i)
{
elementCnt *= input_size[i];
}
- for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
+ for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
{
sizes[i] = 1;
}
- if(coordDim) // input reshape
+ if (coordDim) // input reshape
{
- uint32_t offset = dims_num - coordDim + 1;
- for(i = coordDim-1; i > 0; i--)
- {
- sizes[i] = input_size[i + offset - 1];
- }
- for(i = 0; i < offset; i++)
- {
- sizes[0] *= input_size[i];
- }
+ uint32_t offset = dims_num - coordDim + 1 - batch_dims;
- newDim[0] = coordDim;
- if(coordDim == 1)
+ if (batch_dims)
{
- newDim[0] = 2;
- sizes[0] = block_size;
- sizes[1] = elementCnt / block_size;
+ for (i = 0; i < offset; i++)
+ {
+ sizes[0] *= input_size[i];
+ }
+
+ for (i = 0; i < coordDim; i++)
+ {
+ sizes[i + 1] = input_size[i + offset];
+ }
+
+ newDim[0] = coordDim == 1 ? 2 : 3;
}
- else if(coordDim == 4)
+ else
{
- newDim[0] = 3;
+ for (i = coordDim-1; i > 0; i--)
+ {
+ sizes[i] = input_size[i + offset - 1];
+ }
+ for (i = 0; i < offset; i++)
+ {
+ sizes[0] *= input_size[i];
+ }
+
+ newDim[0] = coordDim;
+ if (coordDim == 1)
+ {
+ newDim[0] = 2;
+ sizes[0] = block_size;
+ sizes[1] = elementCnt / block_size;
+ }
+ else if (coordDim == 4)
+ {
+ newDim[0] = 3;
+ }
}
status = VSI_SUCCESS;
}
else // indices&output reshape
{
- if((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
+ if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
{
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
@@ -222,7 +256,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
- int32_t coord_dim
+ int32_t coord_dim,
+ int32_t batch_dims
)
{
vsi_status status = VSI_FAILURE;
@@ -234,30 +269,49 @@ static vsi_status _query_kernel
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
- if(coord_dim == 1)
+
+ if (input0_dtype == F32)
+ {
+ input0_dtype = F16;
+ }
+ else if (input0_dtype == I32 || input0_dtype == I16)
+ {
+ input0_dtype = I8;
+ }
+
+ if (output_dtype == F32)
+ {
+ output_dtype = F16;
+ }
+ else if (output_dtype == I32 || output_dtype == I16)
+ {
+ output_dtype = I8;
+ }
+
+ if (coord_dim == 1)
{
coord_type = _1D;
}
- else if(coord_dim == 2)
+ else if (coord_dim == 2)
{
coord_type = _2D;
}
- else if(coord_dim == 3 || coord_dim == 4)
+ else if (coord_dim == 3 || coord_dim == 4)
{
coord_type = _3D;
}
- key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type );
+ key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type, batch_dims );
- for( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
+ for ( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
{
- if( gather_nd_map[i].key == key )
+ if ( gather_nd_map[i].key == key )
{
break;
}
}
- if( i < _cnt_of_array(gather_nd_map) )
+ if ( i < _cnt_of_array(gather_nd_map) )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", gather_nd_map[i].function_name );
kernel->info.parameters = _gather_nd_kernel_param_def;
@@ -289,29 +343,30 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_GATHER_ND_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
+ int32_t batch_dims = vsi_nn_kernel_param_get_int32( params, "batch_dims" );
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
- status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim);
- status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim);
- status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim);
- if(status != VSI_SUCCESS)
+ status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim, batch_dims);
+ status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim, batch_dims);
+ status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim, batch_dims);
+ if (status != VSI_SUCCESS)
{
return NULL;
}
- if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
+ if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
{
return NULL;
}
- status = _query_kernel( kernel, inputs, outputs, coord_dim );
- if( VSI_SUCCESS == status)
+ status = _query_kernel( kernel, inputs, outputs, coord_dim, batch_dims );
+ if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
+ if ( node )
{
uint32_t index = 0;
/* Pass parameters to node. */
diff --git a/src/tim/vx/internal/src/kernel/cl/globallppool_cl.c b/src/tim/vx/internal/src/kernel/cl/globallppool_cl.c
new file mode 100644
index 0000000..1e51bd7
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/globallppool_cl.c
@@ -0,0 +1,292 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include
+#include
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+__BEGIN_DECLS
+
+#define _GLOBALLPPOOL_KERNEL_SOURCE_NAME "globallppool"
+
+// Add kernel hashtable here
+#define GLOBALLPPOOL_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
+ (( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
+#define GLOBALLPPOOL_KERNELS( IN_DTYPE, OUT_DTYPE ) \
+ { GLOBALLPPOOL_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
+ CVIVANTE_NAMESPACE("cl.globallppool_"#IN_DTYPE"to"#OUT_DTYPE), \
+ _GLOBALLPPOOL_KERNEL_SOURCE_NAME }, \
+
+typedef struct
+{
+ uint32_t key;
+ char * function_name;
+ const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _globallppool_kernel_map[] =
+{
+ // Register kernel here
+ GLOBALLPPOOL_KERNELS( F32, F32 )
+ GLOBALLPPOOL_KERNELS( F32, U32 )
+ GLOBALLPPOOL_KERNELS( F32, I32 )
+ GLOBALLPPOOL_KERNELS( U32, U32 )
+ GLOBALLPPOOL_KERNELS( U32, F32 )
+ GLOBALLPPOOL_KERNELS( I32, I32 )
+ GLOBALLPPOOL_KERNELS( I32, F32 )
+ GLOBALLPPOOL_KERNELS( BF16, BF16 )
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _globallppool_kernel_param_def[] =
+{
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _GLOBALLPPOOL_PARAM_NUM _cnt_of_array( _globallppool_kernel_param_def )
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_globallppool_initializer)
+ (
+ vsi_nn_kernel_node_t node,
+ const vsi_nn_kernel_node_param_t * param,
+ size_t param_size
+ )
+{
+ gpu_param_t gpu_param = {
+ 1,
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0}
+ };
+ vsi_status status = VSI_FAILURE;
+ vx_tensor output = (vx_tensor)param[1];
+ vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
+ vsi_size_array_t *output_shape = NULL;
+
+ output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
+ CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
+ output_shape = output_attr->shape;
+
+ gpu_param.global_scale[0] = 1;
+ gpu_param.global_size[0] = (output_shape->data[2] + gpu_param.global_scale[0] - 1)
+ / gpu_param.global_scale[0];
+ status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+final:
+ if (output_attr)
+ {
+ vsi_nn_kernel_tensor_attr_release(&output_attr);
+ }
+
+ return status;
+} /* _globallppool_initializer() */
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+ (
+ vsi_nn_kernel_t * kernel,
+ vsi_nn_tensor_t * const * const inputs,
+ vsi_nn_tensor_t * const * const outputs
+ /* Add extra params */
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_dtype_e in_dtype;
+ vsi_nn_kernel_dtype_e out_dtype;
+ const _kernel_map_type * kernel_map = _globallppool_kernel_map;
+ size_t kernel_map_size = _cnt_of_array( _globallppool_kernel_map );
+ vx_param_description_t * param_def = _globallppool_kernel_param_def;
+ vx_kernel_initialize_f initializer = _globallppool_initializer;
+
+ uint32_t key;
+ uint32_t i;
+
+ in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+ out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
+ (( in_dtype ) | (out_dtype << 8 ))
+ switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
+ {
+ case _PACK_SELECT_KEY(F32, F32):
+ case _PACK_SELECT_KEY(F16, F16):
+ case _PACK_SELECT_KEY(F32, F16):
+ case _PACK_SELECT_KEY(F16, F32):
+ key = GLOBALLPPOOL_HASH_KEY( F32, F32);
+ break;
+ case _PACK_SELECT_KEY(F32, U8):
+ case _PACK_SELECT_KEY(F16, U8):
+ key = GLOBALLPPOOL_HASH_KEY( F32, U32);
+ break;
+ case _PACK_SELECT_KEY(F32, I8):
+ case _PACK_SELECT_KEY(F32, I16):
+ case _PACK_SELECT_KEY(F16, I8):
+ case _PACK_SELECT_KEY(F16, I16):
+ key = GLOBALLPPOOL_HASH_KEY( F32, I32);
+ break;
+ case _PACK_SELECT_KEY(U8, U8):
+ key = GLOBALLPPOOL_HASH_KEY( U32, U32);
+ break;
+ case _PACK_SELECT_KEY(U8, F16):
+ case _PACK_SELECT_KEY(U8, F32):
+ key = GLOBALLPPOOL_HASH_KEY( U32, F32);
+ break;
+ case _PACK_SELECT_KEY(I8, I8):
+ case _PACK_SELECT_KEY(I8, I16):
+ case _PACK_SELECT_KEY(I16, I8):
+ case _PACK_SELECT_KEY(I16, I16):
+ key = GLOBALLPPOOL_HASH_KEY( I32, I32);
+ break;
+ case _PACK_SELECT_KEY(I8, F16):
+ case _PACK_SELECT_KEY(I8, F32):
+ case _PACK_SELECT_KEY(I16, F16):
+ case _PACK_SELECT_KEY(I16, F32):
+ key = GLOBALLPPOOL_HASH_KEY( I32, F32);
+ break;
+ default:
+ key = GLOBALLPPOOL_HASH_KEY( in_dtype, out_dtype);
+ break;
+ }
+#undef _PACK_SELECT_KEY
+
+ for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+ {
+ if ( kernel_map[i].key == key )
+ {
+ break;
+ }
+ }
+ if ( i < (uint32_t)kernel_map_size )
+ {
+ snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
+ kernel->info.parameters = param_def;
+ kernel->info.numParams = _cnt_of_array( _globallppool_kernel_param_def );
+ kernel->info.initialize = initializer;
+ // Register code source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
+ "eltwise_ops_helper",
+ kernel_map[i].source_name );
+ // Register binary source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+ kernel_map[i].source_name );
+ status = VSI_SUCCESS;
+ }
+ return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+ (
+ vsi_nn_graph_t * graph,
+ vsi_nn_tensor_t ** inputs,
+ size_t input_num,
+ vsi_nn_tensor_t ** outputs,
+ size_t output_num,
+ const vsi_nn_kernel_param_t * params,
+ vsi_nn_kernel_t * kernel
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_node_param_t node_params[_GLOBALLPPOOL_PARAM_NUM];
+ vsi_nn_kernel_node_t node = NULL;
+ int32_t p = vsi_nn_kernel_param_get_int32(params, "p");
+ int32_t width = (int32_t)inputs[0]->attr.size[0];
+ int32_t height = (int32_t)inputs[0]->attr.size[1];
+ float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
+ float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
+ float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+
+ if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
+ inputs[0]->attr.dim_num )
+ || !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
+ outputs[0]->attr.dim_num ))
+ {
+ return NULL;
+ }
+
+ outputScale = 1.0f / outputScale;
+ inputTail = -(inputTail * inputScale);
+
+ status = _query_kernel( kernel, inputs, outputs );
+ if ( VSI_SUCCESS == status)
+ {
+ node = vsi_nn_kernel_create_node( graph, kernel );
+ if ( node )
+ {
+ /* Set inputs and outputs */
+ uint32_t index = 2;
+ vsi_nn_kernel_node_pack_io( node_params, _GLOBALLPPOOL_PARAM_NUM,
+ inputs, input_num, outputs, output_num );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &p );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
+ /* Pass parameters to node. */
+ status = vsi_nn_kernel_node_pass_param( node, node_params, _GLOBALLPPOOL_PARAM_NUM );
+ vsi_nn_kernel_scalar_release( &node_params[2] );
+ vsi_nn_kernel_scalar_release( &node_params[3] );
+ vsi_nn_kernel_scalar_release( &node_params[4] );
+ vsi_nn_kernel_scalar_release( &node_params[5] );
+ vsi_nn_kernel_scalar_release( &node_params[6] );
+ vsi_nn_kernel_scalar_release( &node_params[7] );
+ vsi_nn_kernel_scalar_release( &node_params[8] );
+ }
+ }
+ return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( globallppool, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cl/l1norm_cl.c b/src/tim/vx/internal/src/kernel/cl/l1norm_cl.c
new file mode 100644
index 0000000..2626bfe
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/l1norm_cl.c
@@ -0,0 +1,365 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include
+#include
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+
+#define _L1NORM_KERNEL_SOURCE_NAME "l1norm"
+
+// Add kernel hashtable here
+#define L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, _image_2d, AXIS) \
+ (( IN_DTYPE << 24 ) | ( OUT_DTYPE << 16) | (_image_2d << 8) | (AXIS))
+#define L1NORM_KERNELS( IN_DTYPE, OUT_DTYPE, AXIS ) \
+ { L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, 0 , AXIS), \
+ CVIVANTE_NAMESPACE("cl.l1norm_"#IN_DTYPE"to"#OUT_DTYPE"_axis"#AXIS), \
+ _L1NORM_KERNEL_SOURCE_NAME }
+
+#define L1NORM_KERNELS_2D( IN_DTYPE, OUT_DTYPE, AXIS ) \
+ { L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, 1, AXIS), \
+ CVIVANTE_NAMESPACE("cl.l1norm_"#IN_DTYPE"to"#OUT_DTYPE"_2D_axis"#AXIS), \
+ _L1NORM_KERNEL_SOURCE_NAME }
+
+typedef struct
+{
+ uint32_t key;
+ char * function_name;
+ const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _l1norm_kernel_map[] =
+{
+ // Register kernel here
+ L1NORM_KERNELS( U32, U32, 0 ),
+ L1NORM_KERNELS( U32, I32, 0 ),
+ L1NORM_KERNELS( U32, F32, 0 ),
+ L1NORM_KERNELS( I32, I32, 0 ),
+ L1NORM_KERNELS( I32, U32, 0 ),
+ L1NORM_KERNELS( I32, F32, 0 ),
+ L1NORM_KERNELS( F32, F32, 0 ),
+ L1NORM_KERNELS( F32, U32, 0 ),
+ L1NORM_KERNELS( F32, I32, 0 ),
+
+ L1NORM_KERNELS( U32, U32, 1 ),
+ L1NORM_KERNELS( U32, I32, 1 ),
+ L1NORM_KERNELS( U32, F32, 1 ),
+ L1NORM_KERNELS( I32, I32, 1 ),
+ L1NORM_KERNELS( I32, U32, 1 ),
+ L1NORM_KERNELS( I32, F32, 1 ),
+ L1NORM_KERNELS( F32, F32, 1 ),
+ L1NORM_KERNELS( F32, U32, 1 ),
+ L1NORM_KERNELS( F32, I32, 1 ),
+
+ L1NORM_KERNELS( U32, U32, 2 ),
+ L1NORM_KERNELS( U32, I32, 2 ),
+ L1NORM_KERNELS( U32, F32, 2 ),
+ L1NORM_KERNELS( I32, I32, 2 ),
+ L1NORM_KERNELS( I32, U32, 2 ),
+ L1NORM_KERNELS( I32, F32, 2 ),
+ L1NORM_KERNELS( F32, F32, 2 ),
+ L1NORM_KERNELS( F32, U32, 2 ),
+ L1NORM_KERNELS( F32, I32, 2 ),
+
+ L1NORM_KERNELS_2D( U32, U32, 0 ),
+ L1NORM_KERNELS_2D( U32, I32, 0 ),
+ L1NORM_KERNELS_2D( U32, F32, 0 ),
+ L1NORM_KERNELS_2D( I32, I32, 0 ),
+ L1NORM_KERNELS_2D( I32, U32, 0 ),
+ L1NORM_KERNELS_2D( I32, F32, 0 ),
+ L1NORM_KERNELS_2D( F32, F32, 0 ),
+ L1NORM_KERNELS_2D( F32, U32, 0 ),
+ L1NORM_KERNELS_2D( F32, I32, 0 ),
+
+ L1NORM_KERNELS_2D( U32, U32, 1 ),
+ L1NORM_KERNELS_2D( U32, I32, 1 ),
+ L1NORM_KERNELS_2D( U32, F32, 1 ),
+ L1NORM_KERNELS_2D( I32, I32, 1 ),
+ L1NORM_KERNELS_2D( I32, U32, 1 ),
+ L1NORM_KERNELS_2D( I32, F32, 1 ),
+ L1NORM_KERNELS_2D( F32, F32, 1 ),
+ L1NORM_KERNELS_2D( F32, U32, 1 ),
+ L1NORM_KERNELS_2D( F32, I32, 1 ),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _l1norm_kernel_param_def[] =
+{
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
+
+ // Add kererl parameters here
+};
+#define _L1NORM_PARAM_NUM _cnt_of_array( _l1norm_kernel_param_def )
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_l1norm_initializer)
+ (
+ vsi_nn_kernel_node_t node,
+ const vsi_nn_kernel_node_param_t * param,
+ size_t param_size
+ )
+{
+ gpu_param_t gpu_param = {
+ 3,
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0}
+ };
+ vsi_status status = VSI_FAILURE;
+ vx_tensor output = (vx_tensor)param[1];
+ vx_int32 axis = 0;
+ vx_int32 dim = 0;
+ vx_int32 width = 0;
+ vx_int32 height = 0;
+ vx_int32 depth = 0;
+
+ vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
+ vsi_size_array_t *output_shape = NULL;
+
+ output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
+ vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &axis);
+ CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
+
+ output_shape = output_attr->shape;
+
+ dim = output_shape->size < 3 ? 2 : 3;
+ width = (vx_int32)output_shape->data[0];
+ height = (vx_int32)output_shape->data[1];
+ depth = dim < 3 ? 1 : (vx_int32)output_shape->data[2];
+
+ gpu_param.dim = dim;
+ gpu_param.global_scale[0] = 1;
+ gpu_param.global_scale[1] = 1;
+ gpu_param.global_scale[2] = 1;
+
+ if (axis == 0)
+ {
+ gpu_param.local_size[0] = 16;
+ gpu_param.local_size[1] = 1;
+ gpu_param.local_size[2] = 1;
+ gpu_param.global_size[0] = 16;
+ gpu_param.global_size[1] = height;
+ gpu_param.global_size[2] = depth;
+ }
+ else if (axis == 1)
+ {
+ gpu_param.local_size[0] = 1;
+ gpu_param.local_size[1] = 16;
+ gpu_param.local_size[2] = 1;
+ gpu_param.global_size[0] = width;
+ gpu_param.global_size[1] = 16;
+ gpu_param.global_size[2] = depth;
+ }
+ else
+ {
+ gpu_param.local_size[0] = 1;
+ gpu_param.local_size[1] = 1;
+ gpu_param.local_size[2] = 16;
+
+ gpu_param.global_size[0] = width;
+ gpu_param.global_size[1] = height;
+ gpu_param.global_size[2] = 16;
+ }
+
+ status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+
+final:
+ if (output_attr)
+ {
+ vsi_nn_kernel_tensor_attr_release(&output_attr);
+ }
+
+ return status;
+} /* _l1norm_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+ (
+ vsi_nn_kernel_t * kernel,
+ vsi_nn_tensor_t * const * const inputs,
+ vsi_nn_tensor_t * const * const outputs,
+ vsi_bool image_2d,
+ int32_t axis
+ /* Add extra params */
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_dtype_e in_dtype;
+ vsi_nn_kernel_dtype_e out_dtype;
+ const _kernel_map_type * kernel_map = _l1norm_kernel_map;
+ size_t kernel_map_size = _cnt_of_array( _l1norm_kernel_map );
+ vx_param_description_t * param_def = _l1norm_kernel_param_def;
+ vx_kernel_initialize_f initializer = _l1norm_initializer;
+
+ uint32_t key;
+ uint32_t i;
+
+ in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+ out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+ if (F16 == in_dtype)
+ {
+ in_dtype = F32;
+ }
+ else if (U8 == in_dtype)
+ {
+ in_dtype = U32;
+ }
+ else if (I16 == in_dtype || I8 == in_dtype)
+ {
+ in_dtype = I32;
+ }
+
+ if (F16 == out_dtype)
+ {
+ out_dtype = F32;
+ }
+ else if (U8 == out_dtype)
+ {
+ out_dtype = U32;
+ }
+ else if (I16 == out_dtype || I8 == out_dtype)
+ {
+ out_dtype = I32;
+ }
+
+ key = L1NORM_HASH_KEY( in_dtype, out_dtype, image_2d, axis);
+
+ for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+ {
+ if ( kernel_map[i].key == key )
+ {
+ break;
+ }
+ }
+ if ( i < (uint32_t)kernel_map_size )
+ {
+ snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
+ kernel->info.parameters = param_def;
+ kernel->info.numParams = _cnt_of_array( _l1norm_kernel_param_def );
+ kernel->info.initialize = initializer;
+ // Register code source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
+ kernel_map[i].source_name );
+ // Register binary source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+ kernel_map[i].source_name );
+ status = VSI_SUCCESS;
+ }
+ return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+ (
+ vsi_nn_graph_t * graph,
+ vsi_nn_tensor_t ** inputs,
+ size_t input_num,
+ vsi_nn_tensor_t ** outputs,
+ size_t output_num,
+ const vsi_nn_kernel_param_t * params,
+ vsi_nn_kernel_t * kernel
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_node_param_t node_params[_L1NORM_PARAM_NUM];
+ vsi_nn_kernel_node_t node = NULL;
+ vsi_bool image_2d = FALSE;
+ int32_t axis = vsi_nn_kernel_param_get_int32(params, "axis");
+ float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
+ float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ float inputZp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+ int32_t axis_size = (int32_t)outputs[0]->attr.size[axis];
+ outputScale = 1.0f / outputScale;
+
+ if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
+ outputs[0]->attr.dim_num ) )
+ {
+ return NULL;
+ }
+
+ image_2d = (outputs[0]->attr.dim_num == 2);
+
+ status = _query_kernel( kernel, inputs, outputs, image_2d, axis );
+ if ( VSI_SUCCESS == status)
+ {
+ node = vsi_nn_kernel_create_node( graph, kernel );
+ if ( node )
+ {
+ /* Set inputs and outputs */
+ uint32_t index = 2;
+ vsi_nn_kernel_node_pack_io( node_params, _L1NORM_PARAM_NUM,
+ inputs, input_num, outputs, output_num );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputZp );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis_size );
+ /* Pass parameters to node. */
+ status = vsi_nn_kernel_node_pass_param( node, node_params, _L1NORM_PARAM_NUM );
+ vsi_nn_kernel_scalar_release( &node_params[2] );
+ vsi_nn_kernel_scalar_release( &node_params[3] );
+ vsi_nn_kernel_scalar_release( &node_params[4] );
+ vsi_nn_kernel_scalar_release( &node_params[5] );
+ vsi_nn_kernel_scalar_release( &node_params[6] );
+ }
+ }
+ return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( l1norm, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cl/logical_not_cl.c b/src/tim/vx/internal/src/kernel/cl/logical_not_cl.c
index bf63043..bcf4d7a 100644
--- a/src/tim/vx/internal/src/kernel/cl/logical_not_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/logical_not_cl.c
@@ -35,6 +35,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
+#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
__BEGIN_DECLS
@@ -212,27 +213,52 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_LOGICAL_NOT_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
+ vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
+ vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
+ vsi_size_t new_rank = 0;
+ vsi_bool ret = FALSE;
- if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
- outputs[0]->attr.dim_num ) )
+ ret = vsi_nn_kernel_optimize_element_shape(
+ inputs[0]->attr.size, inputs[0]->attr.dim_num,
+ shape, &new_rank );
+
+ if ( ret )
{
- return NULL;
+ reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
+ inputs[0], shape, new_rank );
+ reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
+ outputs[0], shape, new_rank );
+ }
+ else
+ {
+ goto final;
}
- image_2d = (outputs[0]->attr.dim_num == 2 || outputs[0]->attr.size[2] == 1);
- status = _query_kernel( kernel, inputs, outputs, image_2d);
- if( VSI_SUCCESS == status)
+ if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[1]->attr.size,
+ reshape_tensors[1]->attr.dim_num ) )
+ {
+ goto final;
+ }
+
+ image_2d = (reshape_tensors[1]->attr.dim_num == 2 || reshape_tensors[1]->attr.size[2] == 1);
+ status = _query_kernel( kernel, &reshape_tensors[0], &reshape_tensors[1], image_2d);
+ if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
+ if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_NOT_PARAM_NUM,
- inputs, input_num, outputs, output_num );
+ &reshape_tensors[0], input_num, &reshape_tensors[1], output_num );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_NOT_PARAM_NUM );
}
}
+
+final:
+ vsi_safe_release_tensor( reshape_tensors[0] );
+ vsi_safe_release_tensor( reshape_tensors[1] );
+
return node;
} /* _setup() */
diff --git a/src/tim/vx/internal/src/kernel/cl/logical_ops_cl.c b/src/tim/vx/internal/src/kernel/cl/logical_ops_cl.c
index d21317c..7121aa9 100644
--- a/src/tim/vx/internal/src/kernel/cl/logical_ops_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/logical_ops_cl.c
@@ -35,7 +35,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
-#include "libnnext/vx_lib_nnext.h"
+#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
@@ -228,30 +228,75 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_LOGICAL_OPS_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
+ vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
+ vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{ 1 }};
+ vsi_size_t new_rank = 0;
+ vsi_bool ret = FALSE;
uint32_t ops_type = vsi_nn_kernel_param_get_int32( params, "ops_type" );
- if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
- outputs[0]->attr.dim_num ) )
+ ret = vsi_nn_kernel_optimize_eltwise_shape(
+ inputs[0]->attr.size, inputs[0]->attr.dim_num,
+ inputs[1]->attr.size, inputs[1]->attr.dim_num,
+ outputs[0]->attr.size, outputs[0]->attr.dim_num,
+ shapes[0], shapes[1], shapes[2], &new_rank );
+
+ if ( ret )
{
- return NULL;
+ reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
+ inputs[0], shapes[0], new_rank );
+ reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
+ inputs[1], shapes[1], new_rank );
+ reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
+ outputs[0], shapes[2], new_rank );
+
+#define _swap_tensor(a, b, tmp) \
+ do { \
+ tmp = a; \
+ a = b; \
+ b = tmp; \
+ } while(0)
+
+ if (shapes[1][3] > shapes[0][3] && new_rank == 4)
+ {
+ vsi_nn_tensor_t* reshape_tmp;
+ _swap_tensor(reshape_tensors[0], reshape_tensors[1], reshape_tmp);
+ }
+
+#undef _swap_tensor
+ }
+ else
+ {
+ goto final;
+ }
+
+ if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[2]->attr.size,
+ reshape_tensors[2]->attr.dim_num ) )
+ {
+ goto final;
}
image_2d = (outputs[0]->attr.dim_num == 2);
- status = _query_kernel( kernel, inputs, outputs, image_2d, (vsi_nn_logical_ops_type_t)ops_type);
+ status = _query_kernel( kernel, reshape_tensors, &reshape_tensors[2],
+ image_2d, (vsi_nn_logical_ops_type_t)ops_type);
- if( VSI_SUCCESS == status)
+ if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
+ if ( node )
{
/* Pass parameters to node. */
vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_OPS_PARAM_NUM,
- inputs, input_num, outputs, output_num );
+ reshape_tensors, input_num, &reshape_tensors[2], output_num );
status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_OPS_PARAM_NUM );
}
}
+final:
+ vsi_safe_release_tensor( reshape_tensors[0] );
+ vsi_safe_release_tensor( reshape_tensors[1] );
+ vsi_safe_release_tensor( reshape_tensors[2] );
+
return node;
} /* _setup() */
diff --git a/src/tim/vx/internal/src/kernel/cl/matrixmul_cl.c b/src/tim/vx/internal/src/kernel/cl/matrixmul_cl.c
index 35eb757..5ff2a93 100644
--- a/src/tim/vx/internal/src/kernel/cl/matrixmul_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/matrixmul_cl.c
@@ -64,12 +64,12 @@ __BEGIN_DECLS
#define TENSOR_MATRIXMUL_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
{ HASH_MATRIXMUL_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, 0), \
- HASH_MATRIXMUL_SH_KERNEL_NAME(F32, F32, F32, IMAGE_DIM), \
+ HASH_MATRIXMUL_SH_KERNEL_NAME(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM), \
SOURCE },
#define TENSOR_MATRIXMUL_TRANSA_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
{ HASH_MATRIXMUL_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, 1), \
- HASH_MATRIXMUL_TRANSA_SH_KERNEL_NAME(F32, F32, F32, IMAGE_DIM), \
+ HASH_MATRIXMUL_TRANSA_SH_KERNEL_NAME(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM), \
SOURCE },
#define TENSOR_MATRIXMUL_TRANSB_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
@@ -83,18 +83,32 @@ static const struct {
const char* source_name;
} matrixmul_map[] =
{
- TENSOR_MATRIXMUL_KERNELS(F16, F16, F16, _2D, KERNEL_SOURCE_1)
- TENSOR_MATRIXMUL_KERNELS(F16, F16, F16, _3D, KERNEL_SOURCE_1)
- TENSOR_MATRIXMUL_TRANSA_KERNELS(F16, F16, F16, _2D, KERNEL_SOURCE_2)
- TENSOR_MATRIXMUL_TRANSA_KERNELS(F16, F16, F16, _3D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSA_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSA_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_1)
- TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _2D, KERNEL_SOURCE_1)
- TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _3D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _2D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _3D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSA_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_2)
+ TENSOR_MATRIXMUL_TRANSA_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_2)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_2)
+ TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_2)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_2)
+ TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_2)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_1)
+ TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_1)
};
/*
@@ -198,10 +212,44 @@ static vsi_status _query_kernel
dim_type = _3D;
}
+ if (input0_dtype == I16 || input0_dtype == I32)
+ {
+ input0_dtype = I8;
+ }
+ else if (input0_dtype == F16)
+ {
+ input0_dtype = F32;
+ }
+ else if (input0_dtype == U32)
+ {
+ input0_dtype = U8;
+ }
+
if (input1_dtype == I16 || input1_dtype == I32)
{
input1_dtype = I8;
}
+ else if (input1_dtype == F16)
+ {
+ input1_dtype = F32;
+ }
+ else if (input1_dtype == U32)
+ {
+ input1_dtype = U8;
+ }
+
+ if (output_dtype == I16 || output_dtype == I32)
+ {
+ output_dtype = I8;
+ }
+ else if (output_dtype == F16)
+ {
+ output_dtype = F32;
+ }
+ else if (output_dtype == U32)
+ {
+ output_dtype = U8;
+ }
key = HASH_MATRIXMUL_KEY( input0_dtype, input1_dtype, output_dtype, dim_type, transa );
@@ -260,6 +308,8 @@ static vsi_nn_kernel_node_t _setup
float scale_out = vsi_nn_get_tensor_scale(outputs[0]);
float zp_out = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ scale_out = 1 / scale_out;
+
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
{
diff --git a/src/tim/vx/internal/src/kernel/cl/maxunpool_cl.c b/src/tim/vx/internal/src/kernel/cl/maxunpool_cl.c
new file mode 100644
index 0000000..408164b
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/maxunpool_cl.c
@@ -0,0 +1,330 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include
+#include
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+
+#define _MAXUNPOOL_KERNEL_SOURCE_NAME "maxunpool"
+
+// Add kernel hashtable here
+#define MAXUNPOOL_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE ) \
+ (( IN_DTYPE0 << 16 ) | ( IN_DTYPE1 << 8 ) | ( OUT_DTYPE ))
+#define MAXUNPOOL_KERNELS( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE ) \
+ { MAXUNPOOL_HASH_KEY( IN_DTYPE0, I32, OUT_DTYPE ), \
+ CVIVANTE_NAMESPACE("cl.maxunpool_"#IN_DTYPE0"to"#OUT_DTYPE), \
+ _MAXUNPOOL_KERNEL_SOURCE_NAME },
+
+
+typedef struct
+{
+ uint32_t key;
+ char * function_name;
+ const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _maxunpool_kernel_map[] =
+{
+ // Register kernel here
+ MAXUNPOOL_KERNELS( F32, I32, F32)
+ MAXUNPOOL_KERNELS( F32, I32, U32)
+ MAXUNPOOL_KERNELS( F32, I32, I32)
+ MAXUNPOOL_KERNELS( U32, I32, U32)
+ MAXUNPOOL_KERNELS( U32, I32, F32)
+ MAXUNPOOL_KERNELS( I32, I32, I32)
+ MAXUNPOOL_KERNELS( I32, I32, F32)
+ MAXUNPOOL_KERNELS( BF16, I32, BF16)
+};
+
+
+/*
+ * Kernel params
+ */
+
+static vx_param_description_t _maxunpool_kernel_param_def[] =
+{
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _MAXUNPOOL_PARAM_NUM _cnt_of_array( _maxunpool_kernel_param_def )
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_maxunpool_initializer)
+ (
+ vsi_nn_kernel_node_t node,
+ const vsi_nn_kernel_node_param_t * param,
+ size_t param_size
+ )
+{
+ gpu_param_t gpu_param = {
+ 3,
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0}
+ };
+ vsi_status status = VSI_FAILURE;
+ vx_tensor output = (vx_tensor)param[2];
+ vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
+ vsi_size_array_t *output_shape = NULL;
+
+ output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
+ CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
+
+ output_shape = output_attr->shape;
+
+ gpu_param.global_scale[0] = 1;
+ gpu_param.global_scale[1] = 1;
+ gpu_param.global_scale[2] = 1;
+ gpu_param.global_size[0] = (output_shape->data[0] + gpu_param.global_scale[0] - 1)
+ / gpu_param.global_scale[0];
+ gpu_param.global_size[1] = (output_shape->data[1] + gpu_param.global_scale[1] - 1)
+ / gpu_param.global_scale[1];
+ gpu_param.global_size[2] = (output_shape->data[2] + gpu_param.global_scale[2] - 1)
+ / gpu_param.global_scale[2];
+ status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+
+final:
+ if (output_attr)
+ {
+ vsi_nn_kernel_tensor_attr_release(&output_attr);
+ }
+
+ return status;
+} /* _maxunpool_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+ (
+ vsi_nn_kernel_t * kernel,
+ vsi_nn_tensor_t * const * const inputs,
+ vsi_nn_tensor_t * const * const outputs
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_dtype_e in_dtype;
+ vsi_nn_kernel_dtype_e out_dtype;
+ const _kernel_map_type * kernel_map = _maxunpool_kernel_map;
+ vx_kernel_initialize_f initializer = _maxunpool_initializer;
+ vx_param_description_t * param_def = _maxunpool_kernel_param_def;
+ size_t kernel_map_size = _cnt_of_array( _maxunpool_kernel_map );
+ size_t param_size = _cnt_of_array( _maxunpool_kernel_param_def );
+ uint32_t key;
+ uint32_t i;
+
+ in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+ out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
+ (( in_dtype ) | (out_dtype << 8 ))
+ switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
+ {
+ case _PACK_SELECT_KEY(F32, F32):
+ case _PACK_SELECT_KEY(F16, F16):
+ case _PACK_SELECT_KEY(F32, F16):
+ case _PACK_SELECT_KEY(F16, F32):
+ key = MAXUNPOOL_HASH_KEY( F32, I32, F32);
+ break;
+ case _PACK_SELECT_KEY(F32, U8):
+ case _PACK_SELECT_KEY(F16, U8):
+ key = MAXUNPOOL_HASH_KEY( F32, I32, U32);
+ break;
+ case _PACK_SELECT_KEY(F32, I8):
+ case _PACK_SELECT_KEY(F32, I16):
+ case _PACK_SELECT_KEY(F16, I8):
+ case _PACK_SELECT_KEY(F16, I16):
+ key = MAXUNPOOL_HASH_KEY( F32, I32, I32);
+ break;
+ case _PACK_SELECT_KEY(U8, U8):
+ key = MAXUNPOOL_HASH_KEY( U32, I32, U32);
+ break;
+ case _PACK_SELECT_KEY(U8, F16):
+ case _PACK_SELECT_KEY(U8, F32):
+ key = MAXUNPOOL_HASH_KEY( U32, I32, F32);
+ break;
+ case _PACK_SELECT_KEY(I8, I8):
+ case _PACK_SELECT_KEY(I8, I16):
+ case _PACK_SELECT_KEY(I16, I8):
+ case _PACK_SELECT_KEY(I16, I16):
+ key = MAXUNPOOL_HASH_KEY( I32, I32, I32);
+ break;
+ case _PACK_SELECT_KEY(I8, F16):
+ case _PACK_SELECT_KEY(I8, F32):
+ case _PACK_SELECT_KEY(I16, F16):
+ case _PACK_SELECT_KEY(I16, F32):
+ key = MAXUNPOOL_HASH_KEY( I32, I32, F32);
+ break;
+ default:
+ key = MAXUNPOOL_HASH_KEY( in_dtype, I32, out_dtype);
+ break;
+ }
+#undef _PACK_SELECT_KEY
+
+ for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+ {
+ if ( kernel_map[i].key == key )
+ {
+ break;
+ }
+ }
+ if ( i < (uint32_t)kernel_map_size )
+ {
+ snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
+ kernel->info.parameters = param_def;
+ kernel->info.numParams = (uint32_t)param_size;
+ kernel->info.initialize = initializer;
+ // Register code source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
+ "eltwise_ops_helper",
+ kernel_map[i].source_name );
+ // Register binary source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+ kernel_map[i].source_name );
+ status = VSI_SUCCESS;
+ }
+
+ return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+ (
+ vsi_nn_graph_t * graph,
+ vsi_nn_tensor_t ** inputs,
+ size_t input_num,
+ vsi_nn_tensor_t ** outputs,
+ size_t output_num,
+ const vsi_nn_kernel_param_t * params,
+ vsi_nn_kernel_t * kernel
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_node_param_t node_params[_MAXUNPOOL_PARAM_NUM] = { NULL };
+ vsi_nn_kernel_node_t node = NULL;
+ int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
+ int32_t pad_right = vsi_nn_kernel_param_get_int32(params, "pad_right");
+ int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
+ int32_t pad_bottom = vsi_nn_kernel_param_get_int32(params, "pad_bottom");
+ int32_t width_in = (int32_t)inputs[0]->attr.size[0];
+ int32_t height_in = (int32_t)inputs[0]->attr.size[1];
+ int32_t width = (int32_t)outputs[0]->attr.size[0];
+ int32_t height = (int32_t)outputs[0]->attr.size[1];
+ int32_t batch = (int32_t)outputs[0]->attr.size[2];
+ int32_t width_nopad = width - pad_left - pad_right;
+ int32_t height_nopad = height - pad_top - pad_bottom;
+ float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
+ float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
+ float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+
+ if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
+ inputs[0]->attr.dim_num )
+ || !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
+ outputs[0]->attr.dim_num ))
+ {
+ return NULL;
+ }
+
+ outputScale = 1.0f / outputScale;
+ inputTail = -(inputTail * inputScale);
+
+ status = _query_kernel( kernel, inputs, outputs );
+
+ if ( VSI_SUCCESS == status)
+ {
+ node = vsi_nn_kernel_create_node( graph, kernel );
+ if ( node )
+ {
+ /* Set inputs and outputs */
+ uint32_t index = 3;
+ vsi_nn_kernel_node_pack_io( node_params, _MAXUNPOOL_PARAM_NUM,
+ inputs, input_num, outputs, output_num );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width_nopad );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height_nopad );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width_in );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height_in );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &batch );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
+ /* Pass parameters to node. */
+ status = vsi_nn_kernel_node_pass_param( node, node_params, _MAXUNPOOL_PARAM_NUM );
+ vsi_nn_kernel_scalar_release( &node_params[3] );
+ vsi_nn_kernel_scalar_release( &node_params[4] );
+ vsi_nn_kernel_scalar_release( &node_params[5] );
+ vsi_nn_kernel_scalar_release( &node_params[6] );
+ vsi_nn_kernel_scalar_release( &node_params[7] );
+ vsi_nn_kernel_scalar_release( &node_params[8] );
+ vsi_nn_kernel_scalar_release( &node_params[9] );
+ vsi_nn_kernel_scalar_release( &node_params[10] );
+ vsi_nn_kernel_scalar_release( &node_params[11] );
+ vsi_nn_kernel_scalar_release( &node_params[12] );
+ vsi_nn_kernel_scalar_release( &node_params[13] );
+ }
+ }
+ return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( maxunpool, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cl/pow_cl.c b/src/tim/vx/internal/src/kernel/cl/pow_cl.c
index 56c0097..1d1020d 100644
--- a/src/tim/vx/internal/src/kernel/cl/pow_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/pow_cl.c
@@ -81,9 +81,11 @@ static const struct {
{
TENSOR_POW_KERNELS_FLOAT(F32, F32, F32, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS_FLOAT(F16, F16, F16, KERNEL_SOURCE_1)
+ TENSOR_POW_KERNELS(U32, F32, U32, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS_2D_FLOAT(F32, F32, F32, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS_2D_FLOAT(F16, F16, F16, KERNEL_SOURCE_1)
+ TENSOR_POW_KERNELS_2D(U32, F32, U32, KERNEL_SOURCE_1)
};
/*
@@ -94,6 +96,10 @@ static vx_param_description_t kernel_param_def[] =
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _CL_PARAM_NUM _cnt_of_array(kernel_param_def)
@@ -179,7 +185,25 @@ static vsi_status _query_kernel
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
- key = HASH_POW_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
+
+#define _PACK_SELECT_KEY( input0_dtype, input1_dtype, output_dtype) \
+ ((input0_dtype) | (input1_dtype << 8) | (output_dtype << 16))
+ switch(_PACK_SELECT_KEY(input0_dtype, input1_dtype, output_dtype))
+ {
+ case _PACK_SELECT_KEY(F16, F16, F16):
+ case _PACK_SELECT_KEY(F32, F32, F32):
+ key = HASH_POW_KEY( F32, F32, F32, image_2d );
+ break;
+ case _PACK_SELECT_KEY(U8, F16, U8):
+ case _PACK_SELECT_KEY(U8, F32, U8):
+ case _PACK_SELECT_KEY(U32, F16, U32):
+ case _PACK_SELECT_KEY(U32, F32, U32):
+ key = HASH_POW_KEY( U32, F32, U32, image_2d );
+ break;
+ default:
+ key = HASH_POW_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
+ break;
+ }
for( i = 0; i < _cnt_of_array(pow_map); i ++ )
{
@@ -219,6 +243,13 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_CL_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
+ float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
+ float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
+ float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+
+ outputScale = 1.0f / outputScale;
+ inputTail = -(inputTail * inputScale);
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
@@ -234,11 +265,20 @@ static vsi_nn_kernel_node_t _setup
if( node )
{
+ uint32_t index = 3;
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
inputs, 2, outputs, 1 );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
+ vsi_nn_kernel_scalar_release( &node_params[3] );
+ vsi_nn_kernel_scalar_release( &node_params[4] );
+ vsi_nn_kernel_scalar_release( &node_params[5] );
+ vsi_nn_kernel_scalar_release( &node_params[6] );
VSI_ASSERT( status == VSI_SUCCESS );
}
}
diff --git a/src/tim/vx/internal/src/kernel/cl/reversesequence_cl.c b/src/tim/vx/internal/src/kernel/cl/reversesequence_cl.c
new file mode 100644
index 0000000..cb9cdcd
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/reversesequence_cl.c
@@ -0,0 +1,307 @@
+/****************************************************************************
+*
+* Copyright (c) 2020 Vivante Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include
+#include
+#include
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "kernel/vsi_nn_kernel_eltwise.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+
+#define _REVERSESEQUENCE_KERNEL_SOURCE_NAME "reversesequence"
+
+// Add kernel hashtable here
+#define REVERSESEQUENCE_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ) \
+ (( IN_DTYPE0 << 24 ) | ( IN_DTYPE1 << 16 ) | ( OUT_DTYPE << 8) | (batch_axis) )
+#define REVERSESEQUENCE_KERNELS( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ) \
+ { REVERSESEQUENCE_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ), \
+ CVIVANTE_NAMESPACE("cl.reversesequence_"#IN_DTYPE0"to"#OUT_DTYPE#batch_axis), \
+ _REVERSESEQUENCE_KERNEL_SOURCE_NAME },
+
+typedef enum
+{
+ _axis1 = 0,
+ _axis2
+} vsi_nn_kernel_batch_axis_type_e;
+
+typedef struct
+{
+ uint32_t key;
+ char * function_name;
+ const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _reversesequence_kernel_map[] =
+{
+ // Register kernel here
+ REVERSESEQUENCE_KERNELS( F32, I32, F32, _axis1)
+ REVERSESEQUENCE_KERNELS( F32, I32, U32, _axis1)
+ REVERSESEQUENCE_KERNELS( F32, I32, I32, _axis1)
+ REVERSESEQUENCE_KERNELS( U32, I32, U32, _axis1)
+ REVERSESEQUENCE_KERNELS( U32, I32, F32, _axis1)
+ REVERSESEQUENCE_KERNELS( I32, I32, I32, _axis1)
+ REVERSESEQUENCE_KERNELS( I32, I32, F32, _axis1)
+ REVERSESEQUENCE_KERNELS( BF16, I32, BF16, _axis1)
+
+ REVERSESEQUENCE_KERNELS( F32, I32, F32, _axis2)
+ REVERSESEQUENCE_KERNELS( F32, I32, U32, _axis2)
+ REVERSESEQUENCE_KERNELS( F32, I32, I32, _axis2)
+ REVERSESEQUENCE_KERNELS( U32, I32, U32, _axis2)
+ REVERSESEQUENCE_KERNELS( U32, I32, F32, _axis2)
+ REVERSESEQUENCE_KERNELS( I32, I32, I32, _axis2)
+ REVERSESEQUENCE_KERNELS( I32, I32, F32, _axis2)
+ REVERSESEQUENCE_KERNELS( BF16, I32, BF16, _axis2)
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _reversesequence_kernel_param_def[] =
+{
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _REVERSESEQUENCE_PARAM_NUM _cnt_of_array( _reversesequence_kernel_param_def )
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_reversesequence_initializer)
+ (
+ vsi_nn_kernel_node_t node,
+ const vsi_nn_kernel_node_param_t * param,
+ size_t param_size
+ )
+{
+ gpu_param_t gpu_param = {
+ 3,
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0},
+ {0, 0, 0}
+ };
+ vsi_status status = VSI_FAILURE;
+ vx_tensor input = (vx_tensor)param[0];
+ vsi_nn_kernel_tensor_attr_t *input_attr = NULL;
+ vsi_size_array_t *input_shape = NULL;
+
+ input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input );
+ CHECK_PTR_FAIL_GOTO( input_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
+
+ input_shape = input_attr->shape;
+
+ gpu_param.global_scale[0] = 1;
+ gpu_param.global_scale[1] = 1;
+ gpu_param.global_scale[2] = 1;
+ gpu_param.global_size[0] = (input_shape->data[0] + gpu_param.global_scale[0] - 1)
+ / gpu_param.global_scale[0];
+ gpu_param.global_size[1] = (input_shape->data[1] + gpu_param.global_scale[1] - 1)
+ / gpu_param.global_scale[1];
+ gpu_param.global_size[2] = (input_shape->data[2] + gpu_param.global_scale[2] - 1)
+ / gpu_param.global_scale[2];
+ status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+final:
+ if (input_attr)
+ {
+ vsi_nn_kernel_tensor_attr_release(&input_attr);
+ }
+
+ return status;
+} /* _reversesequence_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+ (
+ vsi_nn_kernel_t * kernel,
+ vsi_nn_tensor_t * const * const inputs,
+ vsi_nn_tensor_t * const * const outputs,
+ int32_t batch_axis
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_dtype_e in_dtype;
+ vsi_nn_kernel_dtype_e out_dtype;
+ const _kernel_map_type * kernel_map = _reversesequence_kernel_map;
+ size_t kernel_map_size = _cnt_of_array( _reversesequence_kernel_map );
+ vx_param_description_t * param_def = _reversesequence_kernel_param_def;
+ vx_kernel_initialize_f initializer = _reversesequence_initializer;
+ vsi_nn_kernel_batch_axis_type_e axis_type = _axis1;
+ uint32_t key;
+ uint32_t i;
+
+ in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+ out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+ if (batch_axis == 2)
+ {
+ axis_type = _axis2;
+ }
+
+#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
+ (( in_dtype ) | (out_dtype << 8 ))
+ switch(_PACK_SELECT_KEY( in_dtype, out_dtype ))
+ {
+ case _PACK_SELECT_KEY(F16, F16):
+ case _PACK_SELECT_KEY(F32, F32):
+ key = REVERSESEQUENCE_HASH_KEY( F32, I32, F32, axis_type);
+ break;
+ case _PACK_SELECT_KEY(F16, U8):
+ case _PACK_SELECT_KEY(F32, U8):
+ key = REVERSESEQUENCE_HASH_KEY( F32, I32, U32, axis_type);
+ break;
+ case _PACK_SELECT_KEY(F16, I8):
+ case _PACK_SELECT_KEY(F32, I8):
+ case _PACK_SELECT_KEY(F16, I16):
+ case _PACK_SELECT_KEY(F32, I16):
+ key = REVERSESEQUENCE_HASH_KEY( F32, I32, I32, axis_type);
+ break;
+ case _PACK_SELECT_KEY(U8, U8):
+ key = REVERSESEQUENCE_HASH_KEY( U32, I32, U32, axis_type);
+ break;
+ case _PACK_SELECT_KEY(U8, F16):
+ case _PACK_SELECT_KEY(U8, F32):
+ key = REVERSESEQUENCE_HASH_KEY( U32, I32, F32, axis_type);
+ break;
+ case _PACK_SELECT_KEY(I8, I8):
+ case _PACK_SELECT_KEY(I16, I16):
+ key = REVERSESEQUENCE_HASH_KEY( I32, I32, I32, axis_type);
+ break;
+ case _PACK_SELECT_KEY(I8, F16):
+ case _PACK_SELECT_KEY(I8, F32):
+ case _PACK_SELECT_KEY(I16, F16):
+ case _PACK_SELECT_KEY(I16, F32):
+ key = REVERSESEQUENCE_HASH_KEY( I32, I32, F32, axis_type);
+ break;
+ case _PACK_SELECT_KEY(BF16, BF16):
+ key = REVERSESEQUENCE_HASH_KEY( BF16, I32, BF16, axis_type);
+ break;
+ default:
+ key = REVERSESEQUENCE_HASH_KEY( in_dtype, I32, out_dtype, axis_type);
+ break;
+ }
+
+ for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+ {
+ if ( kernel_map[i].key == key )
+ {
+ break;
+ }
+ }
+
+ if ( i < (uint32_t)kernel_map_size )
+ {
+ snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
+ kernel->info.parameters = param_def;
+ kernel->info.numParams = _cnt_of_array( _reversesequence_kernel_param_def );
+ kernel->info.initialize = initializer;
+ // Register code source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
+ "eltwise_ops_helper",
+ kernel_map[i].source_name );
+ // Register binary source
+ vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+ kernel_map[i].source_name );
+ status = VSI_SUCCESS;
+ }
+ return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+ (
+ vsi_nn_graph_t * graph,
+ vsi_nn_tensor_t ** inputs,
+ size_t input_num,
+ vsi_nn_tensor_t ** outputs,
+ size_t output_num,
+ const vsi_nn_kernel_param_t * params,
+ vsi_nn_kernel_t * kernel
+ )
+{
+ vsi_status status = VSI_FAILURE;
+ vsi_nn_kernel_node_param_t node_params[_REVERSESEQUENCE_PARAM_NUM] = { NULL };
+ vsi_nn_kernel_node_t node = NULL;
+ int32_t batch_axis = vsi_nn_kernel_param_get_int32(params, "batch_axis");
+ float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
+ float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+ float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
+ float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+ float inoutScale = inputScale / outputScale;
+ float inoutTail = outputTail - inputTail * inoutScale;
+
+ if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
+ inputs[0]->attr.dim_num )
+ || !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
+ outputs[0]->attr.dim_num ))
+ {
+ return NULL;
+ }
+
+ status = _query_kernel( kernel, inputs, outputs, batch_axis );
+ if ( VSI_SUCCESS == status)
+ {
+ node = vsi_nn_kernel_create_node( graph, kernel );
+ if ( node )
+ {
+ /* Set inputs and outputs */
+ uint32_t index = 3;
+ vsi_nn_kernel_node_pack_io( node_params, _REVERSESEQUENCE_PARAM_NUM,
+ inputs, input_num, outputs, output_num );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inoutScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inoutTail );
+ /* Pass parameters to node. */
+ status = vsi_nn_kernel_node_pass_param( node, node_params, _REVERSESEQUENCE_PARAM_NUM );
+ vsi_nn_kernel_scalar_release( &node_params[3] );
+ vsi_nn_kernel_scalar_release( &node_params[4] );
+ }
+ }
+ return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( reversesequence, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cl/roi_align_cl.c b/src/tim/vx/internal/src/kernel/cl/roi_align_cl.c
index d82816c..e897d0f 100644
--- a/src/tim/vx/internal/src/kernel/cl/roi_align_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/roi_align_cl.c
@@ -88,6 +88,7 @@ static vx_param_description_t _roi_align_kernel_param_def[] =
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _ROI_ALIGN_PARAM_NUM _cnt_of_array( _roi_align_kernel_param_def )
@@ -105,8 +106,9 @@ static vx_param_description_t _roi_align_kernel_param_def[] =
#define SCALAR_SAMPLING_Y_RATIO (15)
#define SCALAR_DEPTH (16)
#define SCALAR_FORMAT (17)
+#define PLATFORM_TYPE (18)
-#define ROI_ALIGN_PARAM_NUM 18
+#define ROI_ALIGN_PARAM_NUM 19
#define ROI_ALIGN_QUANT_PARAM_NUM _cnt_of_array( _roi_align_kernel_param_def )
/*
@@ -250,6 +252,7 @@ static vsi_nn_kernel_node_t _setup
float height_ratio = vsi_nn_kernel_param_get_float32( params, "height_ratio" );
int32_t width_sample_num = vsi_nn_kernel_param_get_int32( params, "width_sample_num" );
int32_t height_sample_num = vsi_nn_kernel_param_get_int32( params, "height_sample_num" );
+ int32_t platform_type = vsi_nn_kernel_param_get_int32( params, "platform_type" );
float input_zp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float input_scale = vsi_nn_get_tensor_scale(inputs[0]);
float input_tail = -(input_zp * input_scale);
@@ -318,6 +321,7 @@ static vsi_nn_kernel_node_t _setup
node_params[SCALAR_SAMPLING_Y_RATIO] = vsi_nn_kernel_scalar_create( graph, F32, &sampling_y_ratio );
node_params[SCALAR_DEPTH] = vsi_nn_kernel_scalar_create( graph, I32, &depth );
node_params[SCALAR_FORMAT] = vsi_nn_kernel_scalar_create( graph, I32, &dtype );
+ node_params[PLATFORM_TYPE] = vsi_nn_kernel_scalar_create( graph, I32, &platform_type );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
@@ -336,6 +340,7 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_scalar_release( &node_params[SCALAR_DEPTH] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_DEPTH] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_FORMAT] );
+ vsi_nn_kernel_scalar_release( &node_params[PLATFORM_TYPE] );
}
}
diff --git a/src/tim/vx/internal/src/kernel/cl/scatter_nd_cl.c b/src/tim/vx/internal/src/kernel/cl/scatter_nd_cl.c
index 5ec59b1..d409c4c 100644
--- a/src/tim/vx/internal/src/kernel/cl/scatter_nd_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/scatter_nd_cl.c
@@ -110,7 +110,7 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
uint32_t i = 0;
vsi_size_t elementCnt = 1;
- if(coordDim != 0 && (width == NULL || area == NULL))
+ if (coordDim != 0 && (width == NULL || area == NULL))
{
return status;
}
@@ -118,17 +118,17 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
newDim[0] = 0;
- for(i = 0; i < dims_num; ++i)
+ for (i = 0; i < dims_num; ++i)
{
elementCnt *= input_size[i];
}
- for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
+ for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
{
sizes[i] = 1;
}
- if((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
+ if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
{
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
@@ -140,17 +140,17 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
return status;
}
- if(coordDim == 1) // index shape
+ if (coordDim == 1) // index shape
{
*width = 0;
*area = 0;
}
- else if(coordDim == 2)
+ else if (coordDim == 2)
{
*width = input_size[dims_num - 2];
*area = 0;
}
- else if(coordDim == 3)
+ else if (coordDim == 3)
{
*width = input_size[dims_num - 3];
*area = input_size[dims_num - 3] * input_size[dims_num - 2];
@@ -226,30 +226,33 @@ static vsi_status _query_kernel
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
- if(coord_dim == 1)
+ if (coord_dim == 1)
{
coord_type = _1D;
}
- else if(coord_dim == 2)
+ else if (coord_dim == 2)
{
coord_type = _2D;
}
- else if(coord_dim == 3)
+ else if (coord_dim == 3)
{
coord_type = _3D;
}
+ input1_dtype = input1_dtype == F16 ? F32 : input1_dtype;
+ output_dtype = output_dtype == F16 ? F32 : output_dtype;
+
key = HASH_SCATTER_ND_KEY( I32, input1_dtype, output_dtype, coord_type );
- for( i = 0; i < _cnt_of_array(scatter_nd_map); i ++ )
+ for ( i = 0; i < _cnt_of_array(scatter_nd_map); i ++ )
{
- if( scatter_nd_map[i].key == key )
+ if ( scatter_nd_map[i].key == key )
{
break;
}
}
- if( i < _cnt_of_array(scatter_nd_map) )
+ if ( i < _cnt_of_array(scatter_nd_map) )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", scatter_nd_map[i].function_name );
kernel->info.parameters = _scatter_nd_kernel_param_def;
@@ -287,26 +290,31 @@ static vsi_nn_kernel_node_t _setup
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
vsi_size_t width = 0, area = 0;
- status = cal_scatter_nd_tensor_reshape_size(&inputs[0], shapes[0], coord_dim, 0, NULL, NULL, &rs_in_dim);
- status |= cal_scatter_nd_tensor_reshape_size(&inputs[1], shapes[1], block_size, 0, NULL, NULL, &rs_idx_dim);
- status |= cal_scatter_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, coord_dim,
- &width, &area, &rs_out_dim);
- if(status != VSI_SUCCESS)
+ if (coord_dim > 3)
{
return NULL;
}
- if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
+ status = cal_scatter_nd_tensor_reshape_size(&inputs[0], shapes[0], coord_dim, 0, NULL, NULL, &rs_in_dim);
+ status |= cal_scatter_nd_tensor_reshape_size(&inputs[1], shapes[1], block_size, 0, NULL, NULL, &rs_idx_dim);
+ status |= cal_scatter_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, coord_dim,
+ &width, &area, &rs_out_dim);
+ if (status != VSI_SUCCESS)
+ {
+ return NULL;
+ }
+
+ if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
{
return NULL;
}
status = _query_kernel( kernel, inputs, outputs, coord_dim );
- if( VSI_SUCCESS == status)
+ if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
+ if ( node )
{
uint32_t index = 0;
/* Pass parameters to node. */
diff --git a/src/tim/vx/internal/src/kernel/cl/scatter_nd_update_cl.c b/src/tim/vx/internal/src/kernel/cl/scatter_nd_update_cl.c
index fd72a9d..d5f2867 100644
--- a/src/tim/vx/internal/src/kernel/cl/scatter_nd_update_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/scatter_nd_update_cl.c
@@ -111,12 +111,12 @@ static vsi_status cal_scatter_nd_update_tensor_reshape_size
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
newDim[0] = 0;
- for(i = 0; i < dims_num; ++i)
+ for (i = 0; i < dims_num; ++i)
{
elementCnt *= input_size[i];
}
- for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
+ for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
{
sizes[i] = 1;
}
@@ -235,7 +235,7 @@ static vsi_status _query_kernel
key = HASH_SCATTER_ND_UPDATE_KEY( input0_dtype, input2_dtype, output_dtype, 0 );
- for( i = 0; i < _cnt_of_array(scatter_nd_update_map); i ++ )
+ for ( i = 0; i < _cnt_of_array(scatter_nd_update_map); i ++ )
{
if ( scatter_nd_update_map[i].key == key )
{
@@ -281,6 +281,13 @@ static vsi_nn_kernel_node_t _setup
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
vsi_size_t width = 0, area = 0, vol = 0;
int32_t offsetX = 0, offsetY = 0, offsetZ = 0, offsetW = 0, offset_idx = 0;
+ vsi_size_t *input_size = inputs[2]->attr.size;
+ uint32_t dims_num = inputs[2]->attr.dim_num;
+
+ if (coord_dim > 4 && input_size[dims_num - 1] > 1)
+ {
+ return NULL;
+ }
status = cal_scatter_nd_update_tensor_reshape_size(&inputs[1], shapes[0],
coord_dim, 0, NULL, NULL, NULL, &rs_in_dim);
diff --git a/src/tim/vx/internal/src/kernel/cl/swish_cl.c b/src/tim/vx/internal/src/kernel/cl/swish_cl.c
index 4c3f206..b616a84 100644
--- a/src/tim/vx/internal/src/kernel/cl/swish_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/swish_cl.c
@@ -113,6 +113,8 @@ static const _kernel_map_type _swish_kernel_map[] =
SWISH_PACK_KERNEL_MAP_2D(U8, U8),
SWISH_PACK_KERNEL_MAP(I32, I32),
SWISH_PACK_KERNEL_MAP_2D(I32, I32),
+ SWISH_PACK_KERNEL_MAP(F32, U8),
+ SWISH_PACK_KERNEL_MAP_2D(F32, U8),
HSWISH_PACK_KERNEL_FLOAT_MAP(F32, F32),
HSWISH_PACK_KERNEL_FLOAT_MAP_2D(F32, F32),
HSWISH_PACK_KERNEL_FLOAT_MAP(F16, F16),
@@ -222,6 +224,11 @@ static vsi_status _query_kernel
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+ if (in_dtype == F16)
+ in_dtype = F32;
+ if (out_dtype == F16)
+ out_dtype = F32;
+
key = SWISH_HASH_KEY(swish_type, in_dtype, out_dtype, image_2d);
for( i = 0; i < kernel_map_size; i ++ )
diff --git a/src/tim/vx/internal/src/kernel/cl/tile_cl.c b/src/tim/vx/internal/src/kernel/cl/tile_cl.c
index dab13f7..6381694 100644
--- a/src/tim/vx/internal/src/kernel/cl/tile_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/tile_cl.c
@@ -279,7 +279,7 @@ static vsi_nn_kernel_node_t _setup
vsi_size_t new_rank = 0;
vsi_bool ret = FALSE;
uint32_t dim = inputs[0]->attr.dim_num;
- vsi_size_t multiples[VSI_NN_MAX_DIM_NUM] = { 0 };
+ vsi_size_t multiples[VSI_NN_MAX_DIM_NUM] = { 1, 1, 1, 1 };
for ( i = 0; i < dim; i++)
{
diff --git a/src/tim/vx/internal/src/kernel/cl/topk_cl.c b/src/tim/vx/internal/src/kernel/cl/topk_cl.c
index a3d5428..0354a1e 100644
--- a/src/tim/vx/internal/src/kernel/cl/topk_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/topk_cl.c
@@ -55,6 +55,13 @@ __BEGIN_DECLS
CVIVANTE_NAMESPACE("cl.topk_odd_even_sort_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
"topk_odd_even_sort" }
+#define TOPK_ODD_EVEN_SORT_HASH_KEY2( IN_DTYPE, OUT_DTYPE ) \
+ ( ( IN_DTYPE ) | ( OUT_DTYPE << 8 ) )
+#define PACK_ODD_EVEN_SORT_KERNEL_MAP2( IN_DTYPE, OUT_DTYPE ) \
+ { TOPK_ODD_EVEN_SORT_HASH_KEY2( IN_DTYPE, OUT_DTYPE ), \
+ CVIVANTE_NAMESPACE("cl.topk_odd_even_sort_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
+ "topk_odd_even_sort2" }
+
typedef struct
{
uint32_t key;
@@ -88,6 +95,22 @@ static const _kernel_map_type _topk_kernel_map[] =
PACK_KERNEL_MAP( I32, I32, 4 ),
PACK_KERNEL_MAP( I32, I32, 5 ),
PACK_KERNEL_MAP( I32, I32, 6 ),
+
+ PACK_KERNEL_MAP( F32, U32, 0 ),
+ PACK_KERNEL_MAP( F32, U32, 1 ),
+ PACK_KERNEL_MAP( F32, U32, 2 ),
+ PACK_KERNEL_MAP( F32, U32, 3 ),
+ PACK_KERNEL_MAP( F32, U32, 4 ),
+ PACK_KERNEL_MAP( F32, U32, 5 ),
+ PACK_KERNEL_MAP( F32, U32, 6 ),
+
+ PACK_KERNEL_MAP( F32, I32, 0 ),
+ PACK_KERNEL_MAP( F32, I32, 1 ),
+ PACK_KERNEL_MAP( F32, I32, 2 ),
+ PACK_KERNEL_MAP( F32, I32, 3 ),
+ PACK_KERNEL_MAP( F32, I32, 4 ),
+ PACK_KERNEL_MAP( F32, I32, 5 ),
+ PACK_KERNEL_MAP( F32, I32, 6 ),
};
static const _kernel_map_type _topk_odd_even_sort_kernel_map[] =
@@ -96,6 +119,8 @@ static const _kernel_map_type _topk_odd_even_sort_kernel_map[] =
PACK_ODD_EVEN_SORT_KERNEL_MAP( F32, F32 ),
PACK_ODD_EVEN_SORT_KERNEL_MAP( U32, U32 ),
PACK_ODD_EVEN_SORT_KERNEL_MAP( I32, I32 ),
+ PACK_ODD_EVEN_SORT_KERNEL_MAP2( F32, U32 ),
+ PACK_ODD_EVEN_SORT_KERNEL_MAP2( F32, I32 ),
};
/*
@@ -108,11 +133,15 @@ static vx_param_description_t _topk_kernel_param_def[] =
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
// Add kererl parameters here
};
#define _TOPK_PARAM_NUM _cnt_of_array( _topk_kernel_param_def )
-#define SCALAR_INPUT_NUM_STAGES (3)
-#define SCALAR_INPUT_WIDTH (4)
+#define SCALAR_INPUT_NUM_STAGES (7)
+#define SCALAR_INPUT_WIDTH (8)
static vx_param_description_t _topk_odd_even_sort_kernel_param_def[] =
{
@@ -122,10 +151,14 @@ static vx_param_description_t _topk_odd_even_sort_kernel_param_def[] =
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+ {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
// Add kererl parameters here
};
#define _TOPK_ODD_EVEN_SORT_PARAM_NUM _cnt_of_array( _topk_odd_even_sort_kernel_param_def )
-#define SCALAR_INPUT_SIZE (5)
+#define SCALAR_INPUT_SIZE (9)
/*
* Kernel initializer
*/
@@ -251,6 +284,22 @@ static vsi_status _query_kernel
case _PACK_SELECT_KEY(I8, I8):
key = TOPK_HASH_KEY( I32, I32, num_stages );
break;
+ case _PACK_SELECT_KEY(F32, U32):
+ case _PACK_SELECT_KEY(F16, U32):
+ case _PACK_SELECT_KEY(F32, U16):
+ case _PACK_SELECT_KEY(F16, U16):
+ case _PACK_SELECT_KEY(F32, U8):
+ case _PACK_SELECT_KEY(F16, U8):
+ key = TOPK_HASH_KEY( F32, U32, num_stages );
+ break;
+ case _PACK_SELECT_KEY(F32, I32):
+ case _PACK_SELECT_KEY(F16, I32):
+ case _PACK_SELECT_KEY(F32, I16):
+ case _PACK_SELECT_KEY(F16, I16):
+ case _PACK_SELECT_KEY(F32, I8):
+ case _PACK_SELECT_KEY(F16, I8):
+ key = TOPK_HASH_KEY( F32, I32, num_stages );
+ break;
default:
break;
}
@@ -318,6 +367,22 @@ static vsi_status _query_odd_even_sort_kernel
case _PACK_SELECT_KEY(I8, I8):
key = TOPK_ODD_EVEN_SORT_HASH_KEY( I32, I32 );
break;
+ case _PACK_SELECT_KEY(F32, U32):
+ case _PACK_SELECT_KEY(F16, U32):
+ case _PACK_SELECT_KEY(F32, U16):
+ case _PACK_SELECT_KEY(F16, U16):
+ case _PACK_SELECT_KEY(F32, U8):
+ case _PACK_SELECT_KEY(F16, U8):
+ key = TOPK_ODD_EVEN_SORT_HASH_KEY2( F32, U32 );
+ break;
+ case _PACK_SELECT_KEY(F32, I32):
+ case _PACK_SELECT_KEY(F16, I32):
+ case _PACK_SELECT_KEY(F32, I16):
+ case _PACK_SELECT_KEY(F16, I16):
+ case _PACK_SELECT_KEY(F32, I8):
+ case _PACK_SELECT_KEY(F16, I8):
+ key = TOPK_ODD_EVEN_SORT_HASH_KEY2( F32, I32 );
+ break;
default:
break;
}
@@ -372,14 +437,24 @@ static vsi_nn_kernel_node_t _setup
int32_t num_stages = (int32_t)ceil(log10(block_size / 2.0f) / log10(2.0f));
vsi_bool is_odd_even_sort = FALSE;
size_t param_num = _TOPK_PARAM_NUM;
+ float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
+ float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
+ float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
+ float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
+
+ outputScale = 1.0f / outputScale;
+ inputTail = -(inputTail * inputScale);
for (i = 1; i < inputs[0]->attr.dim_num; i ++)
{
block_num = block_num * inputs[0]->attr.size[i];
}
- if( vsi_nn_is_same_type(inputs[0], outputs[0]) == FALSE ||
- outputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_INT32 )
+ if ((vsi_nn_is_same_type(inputs[0], outputs[0]) == FALSE ||
+ outputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_INT32 ) &&
+ !(inputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_FLOAT16 &&
+ (outputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_UINT8 ||
+ outputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_INT16)))
{
return NULL;
}
@@ -425,10 +500,15 @@ static vsi_nn_kernel_node_t _setup
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
- /* Set inputs and outputs */
+ uint32_t index = (uint32_t)(input_num + output_num);
+ /* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, param_num,
rs_tensors, input_num, &rs_tensors[input_num], output_num );
/* Pass parameters to node. */
+ node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &inputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &inputTail );
+ node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &outputScale );
+ node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &outputTail );
if (is_odd_even_sort)
{
node_params[SCALAR_INPUT_SIZE] = vsi_nn_kernel_scalar_create(
@@ -452,8 +532,25 @@ final:
vsi_safe_release_tensor(rs_tensors[2]);
vsi_safe_release_tensor(rs_tensors[3]);
vsi_safe_release_tensor(rs_tensors[4]);
+
if (is_odd_even_sort)
{
+ if (node_params[5])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[5] );
+ }
+ if (node_params[6])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[6] );
+ }
+ if (node_params[7])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[7] );
+ }
+ if (node_params[8])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[8] );
+ }
if (node_params[SCALAR_INPUT_SIZE])
{
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SIZE] );
@@ -461,6 +558,22 @@ final:
}
else
{
+ if (node_params[3])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[3] );
+ }
+ if (node_params[4])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[4] );
+ }
+ if (node_params[5])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[5] );
+ }
+ if (node_params[6])
+ {
+ vsi_nn_kernel_scalar_release( &node_params[6] );
+ }
if (node_params[SCALAR_INPUT_NUM_STAGES])
{
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_NUM_STAGES] );
diff --git a/src/tim/vx/internal/src/kernel/cpu/add_mean_std_norm_cpu.c b/src/tim/vx/internal/src/kernel/cpu/add_mean_std_norm_cpu.c
deleted file mode 100644
index f4b6eee..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/add_mean_std_norm_cpu.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.add_mean_std_norm")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _add_mean_std_norm_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _ADD_MEAN_STD_NORM_PARAM_NUM _cnt_of_array( _add_mean_std_norm_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- int32_t i;
- float mean = .0f, stddev_inv = .0f, variance = .0f, input_d = .0f, data = .0f, eps = .0f;
- vsi_ssize_t v_size, n_batch, batch;
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[_CPU_IO_NUM], &(eps));
- v_size = in_attr[0]->shape->data[0];
- n_batch = in_attr[0]->shape->data[1];
-
- for (batch = 0; batch < n_batch; ++batch)
- {
- float sum = 0.0f;
- float sum_sq = 0.0f;
- vsi_ssize_t index_base = batch * v_size;
- for (i = 0; i < v_size; ++i)
- {
- vsi_ssize_t index = i + index_base;
- input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
- sum += input_d;
- sum_sq += input_d * input_d;
- }
-
- mean = sum / v_size;
- stddev_inv = 0.0f;
- variance = sum_sq / v_size - mean * mean;
-
- if (variance == 0)
- {
- stddev_inv = (float)(1.0f / sqrt(eps));
- }
- else
- {
- stddev_inv = (float)(1.0f / sqrt(variance));
- }
-
- for (i = 0; i < v_size; ++i)
- {
- vsi_ssize_t index = i + index_base;
- input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
- data = (input_d - mean) * stddev_inv;
- f32_out_buffer[0][index] = data;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _add_mean_std_norm_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _add_mean_std_norm_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_ADD_MEAN_STD_NORM_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _ADD_MEAN_STD_NORM_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[_CPU_IO_NUM] = vsi_nn_kernel_scalar_create( graph, F32, &eps );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _ADD_MEAN_STD_NORM_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[_CPU_IO_NUM] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( add_mean_std_norm, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/argmax_cpu.c b/src/tim/vx/internal/src/kernel/cpu/argmax_cpu.c
deleted file mode 100644
index 6bb8eeb..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/argmax_cpu.c
+++ /dev/null
@@ -1,201 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "utils/vsi_nn_dtype_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "kernel/vsi_nn_kernel_eltwise.h"
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("argmax_sw")
-
-DEF_KERNEL_EXECUTOR(_argmax_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- int32_t i;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
-
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- for (i = 0; i < axis; i++)
- {
- innerSize *= attr[0]->shape->data[i];
- }
-
- axisSize = attr[0]->shape->data[axis];
-
- for (i = axis + 1; i < (int32_t)attr[0]->shape->size; i++)
- {
- outerSize *= attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- float minMaxValue = buffer[0][outer * axisSize * innerSize + inner];
- int32_t minMaxIndex = 0;
- for (i = 1; i < axisSize; ++i)
- {
- float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
- if (value > minMaxValue)
- {
- minMaxValue = value;
- minMaxIndex = i;
- }
- }
- buffer[1][outer * innerSize + inner] = (float)minMaxIndex;
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _minimum_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _argmax_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-#define SCALAR_INPUT_AXIS (2)
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_AXIS] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( argmax, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/argmin_cpu.c b/src/tim/vx/internal/src/kernel/cpu/argmin_cpu.c
deleted file mode 100644
index 3c9d6b9..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/argmin_cpu.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "utils/vsi_nn_dtype_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "kernel/vsi_nn_kernel_eltwise.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("argmin_sw")
-
-DEF_KERNEL_EXECUTOR(_argmin_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- int32_t i;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
-
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- for (i = 0; i < axis; i++)
- {
- innerSize *= attr[0]->shape->data[i];
- }
-
- axisSize = attr[0]->shape->data[axis];
-
- for (i = axis + 1; i < (int32_t)attr[0]->shape->size; i++)
- {
- outerSize *= attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- float minMaxValue = buffer[0][outer * axisSize * innerSize + inner];
- int32_t minMaxIndex = 0;
- for (i = 1; i < axisSize; ++i)
- {
- float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
- if (value < minMaxValue)
- {
- minMaxValue = value;
- minMaxIndex = i;
- }
- }
- buffer[1][outer * innerSize + inner] = (float)minMaxIndex;
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _minimum_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _argmin_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-#define SCALAR_INPUT_AXIS (2)
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_AXIS] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( argmin, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/axis_aligned_bbox_transform_cpu.c b/src/tim/vx/internal/src/kernel/cpu/axis_aligned_bbox_transform_cpu.c
deleted file mode 100644
index 9d39e21..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/axis_aligned_bbox_transform_cpu.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (4)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.axis_aligned_bbox_transform")
-
-typedef struct vsi_nn_box_encoding_corner_t
-{
- float x1, y1, x2, y2;
-}vsi_nn_box_encoding_corner;
-
-typedef struct vsi_nn_box_encoding_center_t
-{
- float w, h, x, y;
-}vsi_nn_box_encoding_center;
-
-/*
- * Kernel params
- */
-static vx_param_description_t _axis_aligned_bbox_transform_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM _cnt_of_array( _axis_aligned_bbox_transform_kernel_param_def )
-
-
-static void _to_box_encoding_corner
- (
- vsi_nn_box_encoding_center* ctr,
- vsi_nn_box_encoding_corner* cnr
- )
-{
- cnr->x1 = ctr->x - ctr->w / 2;
- cnr->y1 = ctr->y - ctr->h / 2;
- cnr->x2 = ctr->x + ctr->w / 2;
- cnr->y2 = ctr->y + ctr->h / 2;
-}
-
-static void _to_box_encoding_center
- (
- vsi_nn_box_encoding_corner* cnr,
- vsi_nn_box_encoding_center* ctr
- )
-{
- ctr->w = cnr->x2 - cnr->x1;
- ctr->h = cnr->y2 - cnr->y1;
- ctr->x = (cnr->x1 + cnr->x2) / 2;
- ctr->y = (cnr->y1 + cnr->y2) / 2;
-}
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- const uint32_t roiLength = 4;
- const uint32_t imageLength = 2;
- vsi_size_t numClasses = 0;
- vsi_size_t numRois = 0;
- vsi_size_t j;
- vsi_size_t roiIndex;
-
- /* prepare data */
- for (i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- numClasses = in_attr[1]->shape->data[0] / roiLength;
- numRois = in_attr[0]->shape->data[1];
-
- for (roiIndex = 0; roiIndex < numRois; roiIndex++)
- {
- uint32_t batchIndex = (uint32_t)f32_in_buffer[2][roiIndex];
- float imageHeight = f32_in_buffer[3][batchIndex * imageLength];
- float imageWidth = f32_in_buffer[3][batchIndex * imageLength + 1];
- vsi_nn_box_encoding_corner roi_cnr;
- vsi_nn_box_encoding_center roiBefore;
- roi_cnr.x1 = f32_in_buffer[0][roiIndex * roiLength];
- roi_cnr.y1 = f32_in_buffer[0][roiIndex * roiLength + 1];
- roi_cnr.x2 = f32_in_buffer[0][roiIndex * roiLength + 2];
- roi_cnr.y2 = f32_in_buffer[0][roiIndex * roiLength + 3];
- _to_box_encoding_center(&roi_cnr, &roiBefore);
-
- for (j = 0; j < numClasses; j++)
- {
- vsi_nn_box_encoding_center roi_ctr;
- vsi_nn_box_encoding_corner roiAfter;
- vsi_nn_box_encoding_corner cliped;
- vsi_size_t index = (roiIndex * numClasses + j) * roiLength;
-
- roi_ctr.w = (float)(exp(f32_in_buffer[1][index + 2]) * roiBefore.w);
- roi_ctr.h = (float)(exp(f32_in_buffer[1][index + 3]) * roiBefore.h);
- roi_ctr.x = roiBefore.x + f32_in_buffer[1][index] * roiBefore.w;
- roi_ctr.y = roiBefore.y + f32_in_buffer[1][index + 1] * roiBefore.h;
- _to_box_encoding_corner(&roi_ctr, &roiAfter);
-
- cliped.x1 = vsi_nn_min(vsi_nn_max(roiAfter.x1, 0.0f), imageWidth);
- cliped.y1 = vsi_nn_min(vsi_nn_max(roiAfter.y1, 0.0f), imageHeight);
- cliped.x2 = vsi_nn_min(vsi_nn_max(roiAfter.x2, 0.0f), imageWidth);
- cliped.y2 = vsi_nn_min(vsi_nn_max(roiAfter.y2, 0.0f), imageHeight);
- f32_out_buffer[0][index] = cliped.x1;
- f32_out_buffer[0][index + 1] = cliped.y1;
- f32_out_buffer[0][index + 2] = cliped.x2;
- f32_out_buffer[0][index + 3] = cliped.y2;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _axis_aligned_bbox_transform_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _axis_aligned_bbox_transform_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( axis_aligned_bbox_transform, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/batchnorm_single_cpu.c b/src/tim/vx/internal/src/kernel/cpu/batchnorm_single_cpu.c
deleted file mode 100644
index dcf7940..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/batchnorm_single_cpu.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_tensor_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "kernel/vsi_nn_kernel_eltwise.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (5)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("batch_norm_sw")
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for( i = 0; i < rank && index; i ++ )
- {
- if( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
- return offset;
-}
-
-DEF_KERNEL_EXECUTOR(_batch_norm_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_SUCCESS;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- vsi_size_t out_elements = 0;
- vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- float eps = 0.f;
-
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[param_size - 1], &eps);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for ( i = 0; i < _CPU_INPUT_NUM; i++)
- {
- tensors[i] = (vsi_nn_kernel_tensor_t)param[i];
- attr[i] = vsi_nn_kernel_tensor_attr_create( tensors[i] );
-
- vsi_nn_kernel_tensor_attr_get_stride( attr[i], stride_size[i] );
- buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[i], attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[i], "Create input buffer fail.", final );
- }
-
- tensors[5] = (vsi_nn_kernel_tensor_t)param[5];
- attr[5] = vsi_nn_kernel_tensor_attr_create( tensors[5] );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[5] );
-
- buffer[5] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[5], "Create output buffer fail.", final );
- memset( buffer[5], 0, out_elements * sizeof(float) );
-
- for( i = 0; i < out_elements; i ++ )
- {
- vsi_ssize_t in_offset[5] = {0};
- int32_t j = 0;
- float src = 0.f;
- float mean = 0.f;
- float variance = 0.f;
- float beta = 0.f;
- float gamma = 0.f;
-
- for ( j = 0; j < 5; j++)
- {
- in_offset[j] = _expand_offset( i, attr[j]->shape->data, (vsi_size_t)attr[j]->shape->size,
- stride_size[j], attr[5]->shape->data );
- }
-
- src = buffer[0][in_offset[0]];
- mean = buffer[1][in_offset[1]];
- variance = buffer[2][in_offset[2]];
- gamma = buffer[3][in_offset[3]];
- beta = buffer[4][in_offset[4]];
-
-
- buffer[5][i] = (src - mean) * gamma/ sqrtf(variance + eps) + beta;
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[5], attr[5],
- buffer[5], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _batch_norm_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-#define SCALAR_INPUT_EPS (6)
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _batch_norm_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- float eps = 0;
-
- eps = vsi_nn_kernel_param_get_float32(params, "eps");
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- /* Pass parameters to node. */
- backend_params[SCALAR_INPUT_EPS] = vsi_nn_kernel_scalar_create(
- graph, F32, &eps );
-
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_EPS] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( batchnorm_single, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/box_with_nms_limit_cpu.c b/src/tim/vx/internal/src/kernel/cpu/box_with_nms_limit_cpu.c
deleted file mode 100644
index 28a5763..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/box_with_nms_limit_cpu.c
+++ /dev/null
@@ -1,534 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (3)
-#define _OUTPUT_NUM (4)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.box_with_nms_limit")
-
-/*
- * Kernel params
- */
-static vx_param_description_t _box_with_nms_limit_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _BOX_WITH_NMS_LIMIT_PARAM_NUM _cnt_of_array( _box_with_nms_limit_kernel_param_def )
-#define SCORE_THRESHOLD (7)
-#define MAX_NUM_DETECTIONS (8)
-#define NMS_KERNEL_METHOD (9)
-#define IOU_THRESHOLD (10)
-#define SIGMA (11)
-#define NMS_SCORE_THRESHOLD (12)
-
-static float hard_nms_kernel
- (
- float iou,
- float iouThreshold
- )
-{
- return iou < iouThreshold ? 1.0f : 0.0f;
-}
-
-static float linear_nms_kernel
- (
- float iou,
- float iouThreshold
- )
-{
- return iou < iouThreshold ? 1.0f : 1.0f - iou;
-}
-
-static float gaussian_nms_kernel
- (
- float iou,
- float sigma
- )
-{
- return (float)(exp(-1.0f * iou * iou / sigma));
-}
-
-void swap_element
- (
- uint32_t* list,
- uint32_t first,
- uint32_t second
- )
-{
- uint32_t temp = list[first];
- list[first] = list[second];
- list[second] = temp;
-}
-
-uint32_t max_element
- (
- float* data,
- uint32_t* index_list,
- uint32_t len
- )
-{
- uint32_t i;
- uint32_t max_index = 0;
- float max_val = data[index_list[0]];
- for(i = 1; i < len; i++)
- {
- float val = data[index_list[i]];
- if (max_val < val)
- {
- max_val = val;
- max_index = i;
- }
- }
- return max_index;
-}
-
-static uint32_t max_comp_func
- (
- void* data,
- int32_t left,
- int32_t right
- )
-{
- float* fdata = (float*)data;
- return fdata[left] >= fdata[right];
-}
-
-void sort_element_by_score
- (
- float* data,
- uint32_t* index_list,
- uint32_t len
- )
-{
- vsi_nn_partition(data, 0, len - 1, max_comp_func, TRUE, index_list);
-}
-
-typedef struct
-{
- float* fdata;
- uint32_t numClasses;
-} class_comp_param;
-
-static uint32_t class_comp_func
- (
- void* data,
- int32_t left,
- int32_t right
- )
-{
- class_comp_param *p = (class_comp_param*)data;
- float* fdata = p->fdata;
- uint32_t numClasses = p->numClasses;
- uint32_t lhsClass = left % numClasses, rhsClass = right % numClasses;
- return lhsClass == rhsClass ? fdata[left] > fdata[right]
- : lhsClass < rhsClass;
-}
-
-static void sort_element_by_class
- (
- float* data,
- uint32_t* index_list,
- uint32_t len,
- uint32_t numClasses
- )
-{
- class_comp_param class_comp;
- class_comp.fdata = data;
- class_comp.numClasses = numClasses;
- vsi_nn_partition(&class_comp, 0, len - 1, class_comp_func, TRUE, index_list);
-}
-
-// Taking two indices of bounding boxes, return the intersection-of-union.
-float getIoUAxisAligned
- (
- const float* roi1,
- const float* roi2
- )
-{
- const float area1 = (roi1[2] - roi1[0]) * (roi1[3] - roi1[1]);
- const float area2 = (roi2[2] - roi2[0]) * (roi2[3] - roi2[1]);
- const float x1 = vsi_nn_max(roi1[0], roi2[0]);
- const float x2 = vsi_nn_min(roi1[2], roi2[2]);
- const float y1 = vsi_nn_max(roi1[1], roi2[1]);
- const float y2 = vsi_nn_min(roi1[3], roi2[3]);
- const float w = vsi_nn_max(x2 - x1, 0.0f);
- const float h = vsi_nn_max(y2 - y1, 0.0f);
- const float areaIntersect = w * h;
- const float areaUnion = area1 + area2 - areaIntersect;
- return areaIntersect / areaUnion;
-}
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- int32_t* int32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- int32_t* int32_out_buffer[_OUTPUT_NUM] = {0};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i = 0;
- float score_threshold = 0;
- int32_t max_num_detections = 0;
- int32_t nms_kernel_method = 0;
- float iou_threshold = 0;
- float sigma = 0;
- float nms_score_threshold = 0;
- uint32_t j = 0, n = 0, b = 0, c = 0;
- const uint32_t kRoiDim = 4;
- uint32_t numRois = 0;
- uint32_t numClasses = 0;
- int32_t ind = 0;
- uint32_t * batch_data = NULL;
- int32_t numBatch = 0;
- uint32_t * select = NULL;
- uint32_t select_size = 0;
- uint32_t scores_index = 0;
- uint32_t roi_index = 0;
- uint32_t roi_out_index = 0;
-
- /* prepare data */
- for (i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- if (i == 2)
- {
- int32_in_buffer[i] = (int32_t*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( int32_in_buffer[i], "Create input buffer fail.", final );
- }
- else
- {
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input buffer fail.", final );
- }
- }
-
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- if (i < 2)
- {
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- else
- {
- int32_out_buffer[i] = (int32_t *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( int32_out_buffer[i], "Create output buffer fail.", final );
- memset( int32_out_buffer[i], 0, out_bytes[i] );
- }
- }
-
-#define VSI_NN_KERNEL_READ_SCALAR(type, idx, pointer) \
- vsi_nn_kernel_scalar_read_##type((vsi_nn_kernel_scalar_t)param[idx], pointer)
-
- status = VSI_NN_KERNEL_READ_SCALAR(float32, SCORE_THRESHOLD, &score_threshold);
- status |= VSI_NN_KERNEL_READ_SCALAR(int32, MAX_NUM_DETECTIONS, &max_num_detections);
- status |= VSI_NN_KERNEL_READ_SCALAR(int32, NMS_KERNEL_METHOD, &nms_kernel_method);
- status |= VSI_NN_KERNEL_READ_SCALAR(float32, IOU_THRESHOLD, &iou_threshold);
- status |= VSI_NN_KERNEL_READ_SCALAR(float32, SIGMA, &sigma);
- status |= VSI_NN_KERNEL_READ_SCALAR(float32, NMS_SCORE_THRESHOLD, &nms_score_threshold);
- CHECK_STATUS_FAIL_GOTO(status, final );
-#undef VSI_NN_KERNEL_READ_SCALAR
-
- numRois = (uint32_t)in_attr[0]->shape->data[1];
- numClasses = (uint32_t)in_attr[0]->shape->data[0];
-
- batch_data = (uint32_t*)malloc(numRois * sizeof(uint32_t));
- CHECK_PTR_FAIL_GOTO( batch_data, "Create batch_data fail.", final );
- memset(batch_data, 0, numRois * sizeof(uint32_t));
-
- for (i = 0, ind = -1; i < numRois; i++)
- {
- if (int32_in_buffer[2][i] != ind)
- {
- ind = int32_in_buffer[2][i];
- numBatch++;
- }
- batch_data[numBatch - 1]++;
- }
- select = (uint32_t*)malloc(numBatch * numRois
- * numClasses * sizeof(uint32_t));
- CHECK_PTR_FAIL_GOTO( select, "Create select fail.", final );
- memset(select, 0, numBatch * numRois * numClasses * sizeof(uint32_t));
- for (n = 0; n < (uint32_t)numBatch; n++)
- {
- int32_t numDetections_batch = 0;
- uint32_t select_start_batch = select_size;
- uint32_t select_len = 0;
- // Exclude class 0 (background)
- for (c = 1; c < numClasses; c++)
- {
- uint32_t select_start = select_size;
- int32_t maxNumDetections0 = max_num_detections;
- uint32_t numDetections = 0;
- for (b = 0; b < batch_data[n]; b++)
- {
- uint32_t index = b * numClasses + c;
- float score = f32_in_buffer[0][scores_index + index];
- if (score > score_threshold) {
- select[select_size] = index;
- select_size++;
- }
- }
- select_len = select_size - select_start;
-
- if (maxNumDetections0 < 0)
- {
- maxNumDetections0 = select_len;
- }
-
- for (j = 0; (j < select_len && numDetections < (uint32_t)maxNumDetections0); j++)
- {
- // find max score and swap to the front.
- int32_t max_index = max_element(&(f32_in_buffer[0][scores_index]),
- &(select[select_start + j]), select_len - j) + j;
-
- swap_element(&(select[select_start]), max_index, j);
-
- // Calculate IoU of the rest, swap to the end (disgard) if needed.
- for (i = j + 1; i < select_len; i++)
- {
- int32_t roiBase0 = roi_index + select[select_start + i] * kRoiDim;
- int32_t roiBase1 = roi_index + select[select_start + j] * kRoiDim;
- float iou = getIoUAxisAligned(&(f32_in_buffer[1][roiBase0]),
- &(f32_in_buffer[1][roiBase1]));
- float kernel_iou;
- if (nms_kernel_method == 0)
- {
- kernel_iou = hard_nms_kernel(iou, iou_threshold);
- }
- else if (nms_kernel_method == 1)
- {
- kernel_iou = linear_nms_kernel(iou, iou_threshold);
- }
- else
- {
- kernel_iou = gaussian_nms_kernel(iou, sigma);
- }
- f32_in_buffer[0][scores_index + select[select_start + i]] *= kernel_iou;
- if (f32_in_buffer[0][scores_index + select[select_start + i]] < nms_score_threshold)
- {
- swap_element(&(select[select_start]), i, select_len - 1);
- i--;
- select_len--;
- }
- }
- numDetections++;
- }
- select_size = select_start + select_len;
- numDetections_batch += numDetections;
- }
-
- // Take top max_num_detections.
- sort_element_by_score(&(f32_in_buffer[0][scores_index]), &(select[select_start_batch]),
- numDetections_batch);
-
- if (numDetections_batch > max_num_detections && max_num_detections >= 0)
- {
- select_size = select_start_batch + max_num_detections;
- }
- select_len = select_size - select_start_batch;
- // Sort again by class.
- sort_element_by_class(&(f32_in_buffer[0][scores_index]), &(select[select_start_batch]),
- select_len, numClasses);
-
- for (i = 0; i < select_len; i++)
- {
- int32_t in_index0 = scores_index + select[select_start_batch + i];
- int32_t in_index1 = roi_index + select[select_start_batch + i] * kRoiDim;
- f32_out_buffer[0][roi_out_index] = f32_in_buffer[0][in_index0];
- memcpy(&(f32_out_buffer[1][roi_out_index * kRoiDim]),
- &f32_in_buffer[1][in_index1], kRoiDim * sizeof(float));
- int32_out_buffer[2][roi_out_index] = select[select_start_batch + i] % numClasses;
- int32_out_buffer[3][roi_out_index] = n;
- roi_out_index++;
- }
-
- scores_index += batch_data[n] * numClasses;
- roi_index += batch_data[n] * numClasses * kRoiDim;
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (i < 2)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- }
- else
- {
- status = vsi_nn_kernel_tensor_write( output[i], out_attr[i],
- int32_out_buffer[i], out_bytes[i] );
- }
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-final:
- vsi_nn_safe_free(batch_data);
- vsi_nn_safe_free(select);
- for (i = 0; i < _INPUT_NUM; i++)
- {
- vsi_nn_safe_free(f32_in_buffer[i]);
- vsi_nn_safe_free(int32_in_buffer[i]);
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- vsi_nn_safe_free(f32_out_buffer[i]);
- vsi_nn_safe_free(int32_out_buffer[i]);
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _box_with_nms_limit_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _box_with_nms_limit_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_BOX_WITH_NMS_LIMIT_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- float score_threshold = vsi_nn_kernel_param_get_float32( params, "score_threshold" );
- int32_t max_num_detections = vsi_nn_kernel_param_get_int32( params, "max_num_detections" );
- int32_t nms_kernel_method = vsi_nn_kernel_param_get_int32( params, "nms_kernel_method" );
- float iou_threshold = vsi_nn_kernel_param_get_float32( params, "iou_threshold" );
- float sigma = vsi_nn_kernel_param_get_float32( params, "sigma" );
- float nms_score_threshold = vsi_nn_kernel_param_get_float32( params, "nms_score_threshold" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status )
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _BOX_WITH_NMS_LIMIT_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCORE_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &score_threshold );
- node_params[MAX_NUM_DETECTIONS] = vsi_nn_kernel_scalar_create( graph, I32, &max_num_detections );
- node_params[NMS_KERNEL_METHOD] = vsi_nn_kernel_scalar_create( graph, I32, &nms_kernel_method );
- node_params[IOU_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &iou_threshold );
- node_params[SIGMA] = vsi_nn_kernel_scalar_create( graph, F32, &sigma );
- node_params[NMS_SCORE_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &nms_score_threshold );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _BOX_WITH_NMS_LIMIT_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[SCORE_THRESHOLD] );
- vsi_nn_kernel_scalar_release( &node_params[MAX_NUM_DETECTIONS] );
- vsi_nn_kernel_scalar_release( &node_params[NMS_KERNEL_METHOD] );
- vsi_nn_kernel_scalar_release( &node_params[IOU_THRESHOLD] );
- vsi_nn_kernel_scalar_release( &node_params[SIGMA] );
- vsi_nn_kernel_scalar_release( &node_params[NMS_SCORE_THRESHOLD] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( box_with_nms_limit, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/bucketize_cpu.c b/src/tim/vx/internal/src/kernel/cpu/bucketize_cpu.c
deleted file mode 100644
index b5bfbcb..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/bucketize_cpu.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.bucketize")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _bucketize_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _BUCKETIZE_PARAM_NUM _cnt_of_array( _bucketize_kernel_param_def )
-#define SCALAR_RIGHT_VALUE (3)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i = 0, j = 0;
- int32_t right = 0;
- uint32_t boundaries_size = 0;
-
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_RIGHT_VALUE], &(right));
-
- boundaries_size = (uint32_t)in_attr[1]->shape->data[0];
-
- for (i = 0; i < out_elements[0]; i++)
- {
- float src0 = f32_in_buffer[0][i];
- float dst = 0;
-
- for (j = 0; j < boundaries_size; j++)
- {
- float src1 = f32_in_buffer[1][j];
-
- if (right == 1)
- {
- dst += (src0 >= src1 ? 1.0f : 0.0f);
- }
- else
- {
- dst += (src0 > src1 ? 1.0f : 0.0f);
- }
- }
-
- f32_out_buffer[0][i] = dst;
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _bucketize_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _bucketize_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_BUCKETIZE_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t right = vsi_nn_kernel_param_get_int32( params, "right" );
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _BUCKETIZE_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- node_params[SCALAR_RIGHT_VALUE] = vsi_nn_kernel_scalar_create( graph, I32, &right );
- status = vsi_nn_kernel_node_pass_param( node, node_params, _BUCKETIZE_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_RIGHT_VALUE] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( bucketize, _setup )
-
diff --git a/src/tim/vx/internal/src/kernel/cpu/cast_cpu.c b/src/tim/vx/internal/src/kernel/cpu/cast_cpu.c
deleted file mode 100644
index 79cacfc..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/cast_cpu.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "utils/vsi_nn_dtype_util_prv.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.cast")
-
-/*
- * Kernel params
- */
-static vx_param_description_t _cast_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _CAST_PARAM_NUM _cnt_of_array( _cast_kernel_param_def )
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- double max_value = 0.0f, min_value = 0.0f;
- vsi_bool clamp_flag = FALSE;
- vsi_nn_type_e out_type;
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- in_attr[i]->quant = VSI_NN_KERNEL_QUANT_NONE;
- in_attr[i]->dfp.fl = 0;
- in_attr[i]->asymm.scale = 1.0f;
- in_attr[i]->asymm.zero_point = 0;
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- out_type = vsi_nn_dtype_map_kernel(out_attr[0]->dtype);
-
- if( type_is_integer( out_type ) )
- {
- clamp_flag = TRUE;
- type_get_range(out_type, &max_value, &min_value);
- }
-
- for (i = 0; i < out_elements[0]; i++)
- {
- float val = f32_in_buffer[0][i];
- if (clamp_flag)
- {
- val = vsi_nn_clamp(val, (float)min_value, (float)max_value);
- }
- f32_out_buffer[0][i] = val;
- }
-
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- out_attr[i]->quant = VSI_NN_KERNEL_QUANT_NONE;
- out_attr[i]->dfp.fl = 0;
- out_attr[i]->asymm.scale = 1.0f;
- out_attr[i]->asymm.zero_point = 0;
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _cast_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _cast_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_CAST_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _CAST_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _CAST_PARAM_NUM );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( cast, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/clip_cpu.c b/src/tim/vx/internal/src/kernel/cpu/clip_cpu.c
deleted file mode 100644
index 5bb08de..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/clip_cpu.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.clip")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _clip_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _CLIP_PARAM_NUM _cnt_of_array( _clip_kernel_param_def )
-
-#define SCALAR_MIN_VALUE (2)
-#define SCALAR_MAX_VALUE (3)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- float min_value = 0.0f;
- float max_value = 0.0f;
-
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MIN_VALUE], &(min_value));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MAX_VALUE], &(max_value));
-
- for (i = 0; i < out_elements[0]; i++)
- {
- f32_out_buffer[0][i] = vsi_nn_clamp(f32_in_buffer[0][i], min_value, max_value);
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
-
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _clip_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _clip_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_CLIP_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- float min_value = vsi_nn_kernel_param_get_float32( params, "min_value" );
- float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _CLIP_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_MIN_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &min_value );
- node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _CLIP_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_MIN_VALUE] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_VALUE] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( clip, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/comparisons_cpu.c b/src/tim/vx/internal/src/kernel/cpu/comparisons_cpu.c
deleted file mode 100644
index a43f2f3..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/comparisons_cpu.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("comparisons_sw")
-
-typedef enum
-{
- COMP_GREAT = VSI_NN_RELATIONAL_OPS_GREAT,
- COMP_GREAT_EQUAL = VSI_NN_RELATIONAL_OPS_GREAT_EQUAL,
- COMP_LESS = VSI_NN_RELATIONAL_OPS_LESS,
- COMP_LESS_EQUAL = VSI_NN_RELATIONAL_OPS_LESS_EQUAL,
- COMP_NOT_EQUAL = VSI_NN_RELATIONAL_OPS_NOT_EQUAL,
- COMP_EQUAL = VSI_NN_RELATIONAL_OPS_EQUAL,
-} relational_type_e;
-
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for( i = 0; i < rank && index; i ++ )
- {
- if( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
- return offset;
-}
-
-DEF_KERNEL_EXECUTOR(_comparisons_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- int32_t i = 0;
- int32_t operation = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &operation);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
-
- vsi_nn_kernel_tensor_attr_get_stride( attr[0], stride_size[0] );
- vsi_nn_kernel_tensor_attr_get_stride( attr[1], stride_size[1] );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- for (i = 0; i < (int32_t)out_elements; i++)
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- float val1 = 0.f;
- float val2 = 0.f;
- vsi_bool data = 0;
-
- in0_offset = _expand_offset( i, attr[0]->shape->data, (vsi_size_t)attr[0]->shape->size,
- stride_size[0], attr[2]->shape->data );
- in1_offset = _expand_offset( i, attr[1]->shape->data, (vsi_size_t)attr[1]->shape->size,
- stride_size[1], attr[2]->shape->data );
-
- val1 = buffer[0][in0_offset];
- val2 = buffer[1][in1_offset];
-
- switch (operation)
- {
- case COMP_GREAT:
- data = val1 > val2;
- break;
- case COMP_GREAT_EQUAL:
- data = val1 >= val2;
- break;
- case COMP_LESS:
- data = val1 < val2;
- break;
- case COMP_LESS_EQUAL:
- data = val1 <= val2;
- break;
- case COMP_EQUAL:
- data = val1 == val2;
- break;
- case COMP_NOT_EQUAL:
- data = val1 != val2;
- break;
- default:
- break;
- }
- buffer[2][i] = (float)data;
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- if (attr[0])
- {
- vsi_nn_kernel_tensor_attr_release( &attr[0] );
- attr[0] = NULL;
- }
- if (attr[1])
- {
- vsi_nn_kernel_tensor_attr_release( &attr[1] );
- attr[1] = NULL;
- }
- if (attr[2])
- {
- vsi_nn_kernel_tensor_attr_release( &attr[2] );
- attr[2] = NULL;
- }
-
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- buffer[i] = NULL;
- }
- }
- return status;
-} /* _comparisons_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-#define INPUT_FUNC_OP (3)
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _comparisons_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t operation = 0;
-
- operation = vsi_nn_kernel_param_get_int32( params, "operation" );
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[INPUT_FUNC_OP] = vsi_nn_kernel_scalar_create(
- graph, I32, &operation );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &backend_params[INPUT_FUNC_OP] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
-
- return node;
-} /* _setup() */
-
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( relational_ops, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/conv1d_ovxlib_cpu.c b/src/tim/vx/internal/src/kernel/cpu/conv1d_ovxlib_cpu.c
deleted file mode 100644
index dd820df..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/conv1d_ovxlib_cpu.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.conv1d_ovxlib")
-
-/*
- * Kernel params
- */
-static vx_param_description_t _conv1d_ovxlib_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _CONV1D_OVXLIB_PARAM_NUM _cnt_of_array( _conv1d_ovxlib_kernel_param_def )
-#define _IO_COUNT (4)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- int i = 0;
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_IO_COUNT] = { NULL };
- vsi_nn_kernel_tensor_attr_t* attr[_IO_COUNT] = { NULL };
- float* buffer[_IO_COUNT] = { NULL };
- int32_t stride = 0;
- int32_t pad_front = 0;
- int32_t pad_end = 0;
- int32_t dilation = 0;
- int32_t overflow_policy = 0;
- int32_t rounding_policy = 0;
- int32_t down_scale_size_rounding = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input buffer fail.", final );
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- buffer[3] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[3], attr[3], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create input buffer fail.", final );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &stride);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &pad_front);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[6], &pad_end);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[7], &dilation);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[8], &overflow_policy);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[9], &rounding_policy);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[10], &down_scale_size_rounding);
- CHECK_STATUS_FAIL_GOTO(status, final);
-
- {
- vsi_ssize_t batch = attr[0]->shape->data[2];
- vsi_ssize_t input_channel = attr[0]->shape->data[1];
- vsi_ssize_t input_height = attr[0]->shape->data[0];
- vsi_ssize_t kernel_size = attr[1]->shape->data[0];
- vsi_ssize_t output_channel = attr[1]->shape->data[2];
- vsi_ssize_t output_height = attr[3]->shape->data[0];
- vsi_ssize_t batch_index = 0;
- vsi_ssize_t input_channel_index = 0;
- vsi_ssize_t output_channel_index = 0;
- vsi_ssize_t output_h_index = 0;
-
- for(batch_index = 0; batch_index < batch; batch_index++)
- {
- float* per_batch_input = buffer[0] + batch_index * input_channel * input_height;
- float* per_batch_output = buffer[3] + batch_index * output_channel * output_height;
- for(output_channel_index = 0; output_channel_index < output_channel; output_channel_index++)
- {
- float* filter = buffer[1] + output_channel_index * input_channel * kernel_size;
- for(output_h_index = 0; output_h_index < output_height; output_h_index++)
- {
- float output_value = 0.;
- float* current_value_ptr = per_batch_input + output_h_index * stride;
-
- for(input_channel_index = 0; input_channel_index < input_channel; input_channel_index++)
- {
- int k = 0;
- int32_t index = 0;
- for(k = 0; k < kernel_size; k++)
- {
- float w = *(filter + input_channel_index * kernel_size + k);
- float v = *(current_value_ptr + input_channel_index * input_height + index);
-
- output_value += w * v;
- index += dilation;
- }
- }
-
- if(buffer[2])
- {
- output_value += buffer[2][output_channel_index];
- }
-
- *(per_batch_output + output_channel_index * output_height + output_h_index) = output_value;
- }
- }
- }
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], batch * output_channel * output_height );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for( i = 0; i < _IO_COUNT; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
-
- return status;
-} /* _compute() */
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _conv1d_ovxlib_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _conv1d_ovxlib_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_CONV1D_OVXLIB_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int j = 0;
-
- int32_t stride = vsi_nn_kernel_param_get_int32( params, "stride" );
- int32_t pad_front = vsi_nn_kernel_param_get_int32( params, "pad_front" );
- int32_t pad_end = vsi_nn_kernel_param_get_int32( params, "pad_end" );
- int32_t dilation = vsi_nn_kernel_param_get_int32( params, "dilation" );
- int32_t overflow_policy = vsi_nn_kernel_param_get_int32( params, "overflow_policy" );
- int32_t rounding_policy = vsi_nn_kernel_param_get_int32( params, "rounding_policy" );
- int32_t down_scale_size_rounding = vsi_nn_kernel_param_get_int32( params, "down_scale_size_rounding" );
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _CONV1D_OVXLIB_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- j = (int)(input_num + output_num);
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &stride );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &pad_front );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &pad_end );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &dilation );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &overflow_policy );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &rounding_policy );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &down_scale_size_rounding );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _CONV1D_OVXLIB_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( conv1d_ovxlib, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/cpu_backend_conv2d_cpu.c b/src/tim/vx/internal/src/kernel/cpu/cpu_backend_conv2d_cpu.c
deleted file mode 100644
index d273df6..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/cpu_backend_conv2d_cpu.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "cpu_backend/npuref_interface.h"
-
-__BEGIN_DECLS
-
-typedef enum
-{
- PARAM_INPUT = 0,
- PARAM_KERNEL,
- PARAM_BIAS,
- PARAM_OUTPUT,
- PARAM_STRIDE_0,
- PARAM_STRIDE_1,
- PARAM_PAD_0,
- PARAM_PAD_1,
- PARAM_PAD_2,
- PARAM_PAD_3,
- PARAM_DILATION_0,
- PARAM_DILATION_1,
- PARAM_MULTIPLIER,
- PARAM_NUM
-} param_index_e;
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (3)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.cpu_backend_conv2d")
-#define _IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-/*
- * Kernel params
- */
-static vx_param_description_t _cpu_backend_conv2d_kernel_param_def[] =
-{
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL },
- { VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
-};
-#define _CPU_BACKEND_CONV2D_PARAM_NUM _cnt_of_array( _cpu_backend_conv2d_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_attr_t * attr[_IO_NUM] = { NULL };
- int32_t strides[2];
- int32_t pad[4];
- int32_t dilation[2];
- void * buffer[_IO_NUM] = { NULL };
- int32_t i = 0;
- vsi_nn_kernel_tensor_t tensors[_IO_NUM] = { NULL };
- size_t out_elements = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[PARAM_INPUT];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[PARAM_KERNEL];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[PARAM_BIAS];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[PARAM_OUTPUT];
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- if ( param[PARAM_BIAS] )
- {
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- }
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_STRIDE_0], &strides[0] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_STRIDE_0], &strides[1] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_0], &pad[0] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_1], &pad[1] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_2], &pad[2] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_2], &pad[3] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_DILATION_0], &dilation[0] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_DILATION_1], &dilation[1] );
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create kernel buffer fail.", final );
- if ( param[PARAM_BIAS] )
- {
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create bias buffer fail.", final );
- }
- buffer[3] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[3], attr[3], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
-
- npuref_interface_quant_conv2d(buffer[0], attr[0],
- buffer[1], attr[1], buffer[2],
- pad, strides, dilation, attr[3], buffer[3]);
-
- status = vsi_nn_kernel_tensor_write( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for ( i = 0; i < _IO_NUM; i ++ )
- {
- if ( attr[i] )
- {
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _cpu_backend_conv2d_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _cpu_backend_conv2d_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_CPU_BACKEND_CONV2D_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- size_t size = 0;
- int32_t* stride = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "stride", &size);
- int32_t* pad = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "pad", &size);
- int32_t* dilation = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "dilation", &size);
- int32_t multiplier = vsi_nn_kernel_param_get_int32(params, "multiplier");
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _CPU_BACKEND_CONV2D_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[4] = vsi_nn_kernel_scalar_create( graph, I32, &stride[0] );
- node_params[5] = vsi_nn_kernel_scalar_create( graph, I32, &stride[1] );
- node_params[6] = vsi_nn_kernel_scalar_create( graph, I32, &pad[0] );
- node_params[7] = vsi_nn_kernel_scalar_create( graph, I32, &pad[1] );
- node_params[8] = vsi_nn_kernel_scalar_create( graph, I32, &pad[2] );
- node_params[9] = vsi_nn_kernel_scalar_create( graph, I32, &pad[3] );
- node_params[10] = vsi_nn_kernel_scalar_create( graph, I32, &dilation[0] );
- node_params[11] = vsi_nn_kernel_scalar_create( graph, I32, &dilation[1] );
- node_params[12] = vsi_nn_kernel_scalar_create( graph, I32, &multiplier );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _CPU_BACKEND_CONV2D_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &node_params[4] );
- vsi_nn_kernel_scalar_release( &node_params[5] );
- vsi_nn_kernel_scalar_release( &node_params[6] );
- vsi_nn_kernel_scalar_release( &node_params[7] );
- vsi_nn_kernel_scalar_release( &node_params[8] );
- vsi_nn_kernel_scalar_release( &node_params[9] );
- vsi_nn_kernel_scalar_release( &node_params[10] );
- vsi_nn_kernel_scalar_release( &node_params[11] );
- vsi_nn_kernel_scalar_release( &node_params[12] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( cpu_backend_conv2d, _setup )
-
diff --git a/src/tim/vx/internal/src/kernel/cpu/cpu_backend_deconv2d_cpu.c b/src/tim/vx/internal/src/kernel/cpu/cpu_backend_deconv2d_cpu.c
deleted file mode 100644
index b1502a5..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/cpu_backend_deconv2d_cpu.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "cpu_backend/npuref_interface.h"
-
-__BEGIN_DECLS
-
-typedef enum
-{
- PARAM_INPUT = 0,
- PARAM_KERNEL,
- PARAM_BIAS,
- PARAM_OUTPUT,
- PARAM_STRIDE_0,
- PARAM_STRIDE_1,
- PARAM_PAD_0,
- PARAM_PAD_1,
- PARAM_PAD_2,
- PARAM_PAD_3,
-
- PARAM_NUM
-} param_index_e;
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (3)
-#define _OUTPUT_NUM (1)
-#define _IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.cpu_backend_deconv2d")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _cpu_backend_deconv2d_kernel_param_def[] =
-{
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL },
- { VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED },
-};
-#define _CPU_BACKEND_DECONV2D_PARAM_NUM _cnt_of_array( _cpu_backend_deconv2d_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_attr_t * attr[_IO_NUM] = { NULL };
- int32_t strides[2];
- int dilation[2] = {1, 1};
- int32_t pad[4];
- void * buffer[_IO_NUM] = { NULL };
- int32_t i = 0;
- vsi_nn_kernel_tensor_t tensors[_IO_NUM] = { NULL };
- size_t out_elements = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[PARAM_INPUT];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[PARAM_KERNEL];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[PARAM_BIAS];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[PARAM_OUTPUT];
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- if ( param[PARAM_BIAS] )
- {
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- }
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_STRIDE_0], &strides[0] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_STRIDE_0], &strides[1] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_0], &pad[0] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_1], &pad[1] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_2], &pad[2] );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_2], &pad[3] );
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create kernel buffer fail.", final );
- if ( param[PARAM_BIAS] )
- {
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create bias buffer fail.", final );
- }
- buffer[3] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[3], attr[3], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
-
- npuref_interface_quant_deconv2d(buffer[0], attr[0],
- buffer[1], attr[1], buffer[2],
- pad, strides, dilation, attr[3], buffer[3]);
-
- status = vsi_nn_kernel_tensor_write( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for ( i = 0; i < _IO_NUM; i ++ )
- {
- if ( attr[i] )
- {
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _cpu_backend_deconv2d_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _cpu_backend_deconv2d_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_CPU_BACKEND_DECONV2D_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- size_t size = 0;
- int32_t* stride = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "stride", &size);
- int32_t* pad = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "pad", &size);
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _CPU_BACKEND_DECONV2D_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[4] = vsi_nn_kernel_scalar_create( graph, I32, &stride[0] );
- node_params[5] = vsi_nn_kernel_scalar_create( graph, I32, &stride[1] );
- node_params[6] = vsi_nn_kernel_scalar_create( graph, I32, &pad[0] );
- node_params[7] = vsi_nn_kernel_scalar_create( graph, I32, &pad[1] );
- node_params[8] = vsi_nn_kernel_scalar_create( graph, I32, &pad[2] );
- node_params[9] = vsi_nn_kernel_scalar_create( graph, I32, &pad[3] );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _CPU_BACKEND_DECONV2D_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &node_params[4] );
- vsi_nn_kernel_scalar_release( &node_params[5] );
- vsi_nn_kernel_scalar_release( &node_params[6] );
- vsi_nn_kernel_scalar_release( &node_params[7] );
- vsi_nn_kernel_scalar_release( &node_params[8] );
- vsi_nn_kernel_scalar_release( &node_params[9] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( cpu_backend_deconv2d, _setup )
-
diff --git a/src/tim/vx/internal/src/kernel/cpu/cumsum_cpu.c b/src/tim/vx/internal/src/kernel/cpu/cumsum_cpu.c
deleted file mode 100644
index 29f333d..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/cumsum_cpu.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (3)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.cumsum")
-
-DEF_KERNEL_EXECUTOR(_cumsum_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[2] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- int32_t i = 0;
- int32_t axisSize = 1, innerSize = 1, outerSize = 1;
- int32_t axis = 0, exclusive = 0, reverse = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &exclusive);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &reverse);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- {
- int32_t dims_num = (int32_t)attr[1]->shape->size;
- int32_t inner = 0;
- int32_t outer = 0;
-
- for(i = 0; i < axis; ++i)
- {
- innerSize *= (int32_t)attr[0]->shape->data[i];
- }
-
- axisSize = (int32_t)attr[0]->shape->data[i++];
-
- for(; i < dims_num; ++i)
- {
- outerSize *= (int32_t)attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- float sum = .0f;
-
- if (exclusive && reverse)
- {
- int32_t idx_out = (outer * axisSize + axisSize - 1) * innerSize + inner;
- buffer[1][idx_out] = sum;
- for (i = axisSize - 1; i > 0; i--)
- {
- int32_t idx = (outer * axisSize + i) * innerSize + inner;
- float value = buffer[0][idx];
- idx_out = (outer * axisSize + i - 1) * innerSize + inner;
- sum += value;
- buffer[1][idx_out] = sum;
- }
- }
- else if (exclusive)
- {
- int32_t idx_out = outer * axisSize * innerSize + inner;
- buffer[1][idx_out] = sum;
- for (i = 0; i < axisSize - 1; ++i)
- {
- int32_t idx = (outer * axisSize + i) * innerSize + inner;
- float value = buffer[0][idx];
- idx_out = (outer * axisSize + i + 1) * innerSize + inner;
- sum += value;
- buffer[1][idx_out] = sum;
- }
- }
- else if (reverse)
- {
- for (i = axisSize - 1; i >= 0; i--)
- {
- int32_t idx = (outer * axisSize + i) * innerSize + inner;
- float value = buffer[0][idx];
- sum += value;
- buffer[1][idx] = sum;
- }
- }
- else
- {
- for (i = 0; i < axisSize; ++i)
- {
- // i * innerSize + inner + outer * innerSize * axisSize
- int32_t idx = (outer * axisSize + i) * innerSize + inner;
- float value = buffer[0][idx];
- sum += value;
- buffer[1][idx] = sum;
- }
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for ( i = 0; i < 2; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for ( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _cumsum_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _cumsum_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _CUMSUM_PARAM_NUM _cnt_of_array( _cumsum_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _cumsum_exec;
- kernel->info.parameters = _cumsum_kernel_param_def;
- kernel->info.numParams = _CUMSUM_PARAM_NUM;
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- uint32_t index = 2;
- int32_t axis = vsi_nn_kernel_param_get_int32( params, "axis" );
- int32_t exclusive = vsi_nn_kernel_param_get_int32( params, "exclusive" );
- int32_t reverse = vsi_nn_kernel_param_get_int32( params, "reverse" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &exclusive );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &reverse );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[2] );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( cumsum, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/depth2space_internal_cpu.c b/src/tim/vx/internal/src/kernel/cpu/depth2space_internal_cpu.c
deleted file mode 100644
index dea83c9..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/depth2space_internal_cpu.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.depth2space_crd")
-
-DEF_KERNEL_EXECUTOR(_depth2space_crd_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[2] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t block_size = 1;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &block_size);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- {
- vsi_size_t output_batch = attr[1]->shape->size > 3 ? attr[1]->shape->data[3] : 1;
- vsi_size_t output_depth = attr[1]->shape->data[2];
- vsi_size_t output_height = attr[1]->shape->data[1];
- vsi_size_t output_width = attr[1]->shape->data[0];
- vsi_size_t input_depth = attr[0]->shape->data[2];
- vsi_size_t input_height = attr[0]->shape->data[1];
- vsi_size_t input_width = attr[0]->shape->data[0];
- vsi_size_t batch = 0, out_h = 0, out_w = 0;
-
- for (batch = 0; batch < output_batch; ++ batch)
- {
- vsi_size_t output_batch_index = batch * output_height * output_width * output_depth;
- vsi_size_t input_batch_index = batch * input_height * input_width * input_depth;
- vsi_size_t out_d = 0;
- vsi_size_t block_e2 = block_size * block_size;
-
- for (out_d = 0; out_d < output_depth; out_d ++)
- {
- for (out_h = 0; out_h < output_height; ++ out_h)
- {
- for (out_w = 0; out_w < output_width; out_w ++)
- {
- vsi_size_t in_w = out_w / block_size;
- vsi_size_t in_h = out_h / block_size;
- vsi_size_t in_d = (out_w % block_size) + (out_h % block_size) * block_size + out_d * block_e2;
-
- vsi_size_t in_index = in_w + in_h * input_width + in_d * input_width * input_height
- + input_batch_index;
- vsi_size_t out_index = out_w + out_h * output_width + out_d * output_height * output_width
- + output_batch_index;
-
- buffer[1][out_index] = buffer[0][in_index];
- }
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- return status;
-} /* _depth2space_crd_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _depth2space_crd_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _DEPTH2SPACE_CRD_PARAM_NUM _cnt_of_array( _depth2space_crd_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _depth2space_crd_exec;
- kernel->info.parameters = _depth2space_crd_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _depth2space_crd_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 2;
- int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index] = vsi_nn_kernel_scalar_create( graph, I32, &block_size );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[2] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( depth2space_internal, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/depthwise_conv1d_cpu.c b/src/tim/vx/internal/src/kernel/cpu/depthwise_conv1d_cpu.c
deleted file mode 100644
index e6c787b..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/depthwise_conv1d_cpu.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "cpu_backend/npuref_interface.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-typedef enum
-{
- PARAM_INPUT = 0,
- PARAM_KERNEL,
- PARAM_BIAS,
- PARAM_OUTPUT,
- PARAM_STRIDE,
- PARAM_PAD_FRONT,
- PARAM_PAD_END,
- PARAM_DILATION,
- PARAM_MULTIPLIER,
- PARAM_NUM
-} param_index_e;
-
-#define _INPUT_NUM (PARAM_NUM)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.depthwise_conv1d")
-#define _IO_NUM (4)
-
-/*
- * Kernel params
- */
-static vx_param_description_t _depthwise_conv1d_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _DEPTHWISE_CONV1D_PARAM_NUM _cnt_of_array( _depthwise_conv1d_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_attr_t * attr[_IO_NUM] = { NULL };
- int32_t stride;
- int32_t pad_front;
- int32_t pad_end;
- int32_t dilation;
- int32_t multiplier;
- void * buffer[_IO_NUM] = { NULL };
- int32_t i = 0;
- vsi_nn_kernel_tensor_t tensors[_IO_NUM] = { NULL };
- size_t out_elements = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[PARAM_INPUT];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[PARAM_KERNEL];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[PARAM_BIAS];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[PARAM_OUTPUT];
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- if( param[PARAM_BIAS] )
- {
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- }
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_STRIDE], &stride );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_FRONT], &pad_front );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_PAD_END], &pad_end );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_DILATION], &dilation );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[PARAM_MULTIPLIER], &multiplier );
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create kernel buffer fail.", final );
- if( param[PARAM_BIAS] )
- {
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create bias buffer fail.", final );
- }
- buffer[3] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[3], attr[3], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
-
-
- {
- // Use conv2d compute
- int32_t input_shape_4d[4] = {1,0,0,0};
- int32_t kernel_shape_4d[4] = {1,0,0,0};
- int32_t output_shape_4d[4] = {1,0,0,0};
- memcpy( &input_shape_4d[1], attr[0]->shape->data, 3 * sizeof(int32_t) );
- memcpy( &kernel_shape_4d[1], attr[1]->shape->data, 3 * sizeof(int32_t) );
- memcpy( &output_shape_4d[1], attr[3]->shape->data, 3 * sizeof(int32_t) );
- npuref_interface_quant_depthwise_conv2d(
- buffer[0], buffer[1], buffer[2],
- input_shape_4d, 4,
- kernel_shape_4d, 4,
- output_shape_4d, 4,
- attr[0]->asymm.scale, attr[0]->asymm.zero_point,
- attr[1]->asymm.scale, attr[1]->asymm.zero_point,
- attr[3]->asymm.scale, attr[3]->asymm.zero_point,
- pad_front, pad_end, 0, 0,
- stride, 1, dilation, 1,
- buffer[3]
- );
- status = vsi_nn_kernel_tensor_write( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for( i = 0; i < _IO_NUM; i ++ )
- {
- if( attr[i] )
- {
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _depthwise_conv1d_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _depthwise_conv1d_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t stride = vsi_nn_kernel_param_get_int32( params, "stride" );
- int32_t pad_front = vsi_nn_kernel_param_get_int32( params, "pad_front" );
- int32_t pad_end = vsi_nn_kernel_param_get_int32( params, "pad_end" );
- int32_t dilation = vsi_nn_kernel_param_get_int32( params, "dilation" );
- int32_t multiplier = vsi_nn_kernel_param_get_int32( params, "multiplier" );
-
- if(!( inputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_UINT8
- && inputs[1]->attr.dtype.vx_type == VSI_NN_TYPE_UINT8
- && outputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_UINT8))
- {
- //TODO: Support other types
- return NULL;
- }
-
- if( !npuref_exists() )
- {
- return NULL;
- }
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _DEPTHWISE_CONV1D_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[PARAM_STRIDE] = vsi_nn_kernel_scalar_create( graph, I32, &stride );
- node_params[PARAM_PAD_FRONT] = vsi_nn_kernel_scalar_create( graph, I32, &pad_front );
- node_params[PARAM_PAD_END] = vsi_nn_kernel_scalar_create( graph, I32, &pad_end );
- node_params[PARAM_DILATION] = vsi_nn_kernel_scalar_create( graph, I32, &dilation );
- node_params[PARAM_MULTIPLIER] = vsi_nn_kernel_scalar_create( graph, I32, &multiplier );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params,
- _DEPTHWISE_CONV1D_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[PARAM_STRIDE] );
- vsi_nn_kernel_scalar_release( &node_params[PARAM_PAD_FRONT] );
- vsi_nn_kernel_scalar_release( &node_params[PARAM_PAD_END] );
- vsi_nn_kernel_scalar_release( &node_params[PARAM_DILATION] );
- vsi_nn_kernel_scalar_release( &node_params[PARAM_MULTIPLIER] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( depthwise_conv1d, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/detect_post_box_cpu.c b/src/tim/vx/internal/src/kernel/cpu/detect_post_box_cpu.c
deleted file mode 100644
index 48de41c..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/detect_post_box_cpu.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.detect_post_box")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _detect_post_box_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _DETECT_POST_BOX_PARAM_NUM _cnt_of_array( _detect_post_box_kernel_param_def )
-
-#define SCALAR_SCALE_Y (3)
-#define SCALAR_SCALE_X (4)
-#define SCALAR_SCALE_H (5)
-#define SCALAR_SCALE_W (6)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- vsi_size_t n, a, numBatches, numAnchors, lengthBoxEncoding;
- uint32_t kRoiDim = 4;
- float inv_scale_y = 0.0f;
- float inv_scale_x = 0.0f;
- float inv_scale_h = 0.0f;
- float inv_scale_w = 0.0f;
-
- /* prepare data */
- for ( i = 0; i < _INPUT_NUM; i++ )
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_SCALE_Y], &(inv_scale_y));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_SCALE_X], &(inv_scale_x));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_SCALE_H], &(inv_scale_h));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_SCALE_W], &(inv_scale_w));
-
- numBatches = in_attr[0]->shape->data[2];
- numAnchors = in_attr[0]->shape->data[1];
- lengthBoxEncoding = in_attr[0]->shape->data[0];
-
- for ( n = 0; n < numBatches; n++ )
- {
- vsi_ssize_t batch_in_offset = n * numAnchors * lengthBoxEncoding;
- vsi_ssize_t batch_out_offset = n * numAnchors * kRoiDim;
- for ( a = 0; a < numAnchors; a++ )
- {
- float yCtr = f32_in_buffer[1][a * kRoiDim] + f32_in_buffer[1][a * kRoiDim + 2]
- * f32_in_buffer[0][batch_in_offset + a * lengthBoxEncoding] * inv_scale_y;
- float xCtr = f32_in_buffer[1][a * kRoiDim + 1] + f32_in_buffer[1][a * kRoiDim + 3]
- * f32_in_buffer[0][batch_in_offset + a * lengthBoxEncoding + 1] * inv_scale_x;
- float hHalf = f32_in_buffer[1][a * kRoiDim + 2] *
- (float)exp(f32_in_buffer[0][batch_in_offset + a * lengthBoxEncoding + 2] * inv_scale_h) * 0.5f;
- float wHalf = f32_in_buffer[1][a * kRoiDim + 3] *
- (float)exp(f32_in_buffer[0][batch_in_offset + a * lengthBoxEncoding + 3] * inv_scale_w) * 0.5f;
- f32_out_buffer[0][batch_out_offset + a * kRoiDim] = yCtr - hHalf;
- f32_out_buffer[0][batch_out_offset + a * kRoiDim + 1] = xCtr - wHalf;
- f32_out_buffer[0][batch_out_offset + a * kRoiDim + 2] = yCtr + hHalf;
- f32_out_buffer[0][batch_out_offset + a * kRoiDim + 3] = xCtr + wHalf;
- }
- }
-
-
- /* save data */
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for ( i = 0; i < _INPUT_NUM; i++ )
- {
- if ( f32_in_buffer[i] )
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if ( in_attr[i] )
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- if ( f32_out_buffer[i] )
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if ( out_attr[i] )
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _detect_post_box_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _detect_post_box_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_DETECT_POST_BOX_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- float inv_scale_y = vsi_nn_kernel_param_get_float32( params, "inv_scale_y" );
- float inv_scale_x = vsi_nn_kernel_param_get_float32( params, "inv_scale_x" );
- float inv_scale_h = vsi_nn_kernel_param_get_float32( params, "inv_scale_h" );
- float inv_scale_w = vsi_nn_kernel_param_get_float32( params, "inv_scale_w" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status )
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _DETECT_POST_BOX_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_SCALE_Y] = vsi_nn_kernel_scalar_create( graph, F32, &inv_scale_y );
- node_params[SCALAR_SCALE_X] = vsi_nn_kernel_scalar_create( graph, F32, &inv_scale_x );
- node_params[SCALAR_SCALE_H] = vsi_nn_kernel_scalar_create( graph, F32, &inv_scale_h );
- node_params[SCALAR_SCALE_W] = vsi_nn_kernel_scalar_create( graph, F32, &inv_scale_w );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _DETECT_POST_BOX_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_Y] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_H] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_W] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( detect_post_box, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/detect_post_nms_cpu.c b/src/tim/vx/internal/src/kernel/cpu/detect_post_nms_cpu.c
deleted file mode 100644
index 3092350..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/detect_post_nms_cpu.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (4)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.detect_post_nms")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _detect_post_nms_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _DETECT_POST_NMS_PARAM_NUM _cnt_of_array( _detect_post_nms_kernel_param_def )
-
-#define SCALAR_NMS_TYPE (6)
-#define SCALAR_MAX_NUM (7)
-#define SCALAR_MAX_CLASS (8)
-#define SCALAR_MAX_DETECT (9)
-#define SCALAR_SCORE_TH (10)
-#define SCALAR_IOU_TH (11)
-#define SCALAR_IS_BG (12)
-
-static void _swap_element
- (
- uint32_t* list,
- uint32_t first,
- uint32_t second
- )
-{
- uint32_t temp = list[first];
- list[first] = list[second];
- list[second] = temp;
-}
-
-static uint32_t _max_element
- (
- float* data,
- uint32_t* index_list,
- uint32_t len
- )
-{
- uint32_t i;
- uint32_t max_index = 0;
- float max_val = data[index_list[0]];
- for ( i = 1; i < len; i++ )
- {
- float val = data[index_list[i]];
- if ( max_val < val )
- {
- max_val = val;
- max_index = i;
- }
- }
- return max_index;
-}
-
-static float _getIoUAxisAligned
- (
- const float* roi1,
- const float* roi2
- )
-{
- const float area1 = (roi1[2] - roi1[0]) * (roi1[3] - roi1[1]);
- const float area2 = (roi2[2] - roi2[0]) * (roi2[3] - roi2[1]);
- const float x1 = vsi_nn_max(roi1[0], roi2[0]);
- const float x2 = vsi_nn_min(roi1[2], roi2[2]);
- const float y1 = vsi_nn_max(roi1[1], roi2[1]);
- const float y2 = vsi_nn_min(roi1[3], roi2[3]);
- const float w = vsi_nn_max(x2 - x1, 0.0f);
- const float h = vsi_nn_max(y2 - y1, 0.0f);
- const float areaIntersect = w * h;
- const float areaUnion = area1 + area2 - areaIntersect;
- return areaIntersect / areaUnion;
-}
-
-static uint32_t _max_comp_func
- (
- void* data,
- int32_t left,
- int32_t right
- )
-{
- float* fdata = (float*)data;
- return fdata[left] >= fdata[right];
-}
-
-static void _sort_element_by_score
- (
- float* data,
- uint32_t* index_list,
- uint32_t len
- )
-{
- vsi_nn_partition(data, 0, len - 1, _max_comp_func, TRUE, index_list);
-}
-
-static float _max_element_value
- (
- float* data,
- uint32_t len
- )
-{
- uint32_t i;
- float max_val = data[0];
- for ( i = 1; i < len; i++ )
- {
- float val = data[i];
- if ( max_val < val )
- {
- max_val = val;
- }
- }
- return max_val;
-}
-
-static void _iota
- (
- int32_t * data,
- uint32_t len,
- int32_t value
- )
-{
- uint32_t i;
- for ( i = 0; i < len; i++ )
- {
- data [i] = value;
- value++;
- }
-}
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i, j;
- vsi_size_t n, a, c, b, numBatches, numAnchors, numClasses;
- int32_t nms_type = 0;
- int32_t max_num_detections = 0;
- int32_t maximum_class_per_detection = 0;
- int32_t maximum_detection_per_class = 0;
- float score_threshold = 0.0f;
- float iou_threshold = 0.0f;
- int32_t is_bg_in_label = 0;
- vsi_size_t numOutDetection = 0;
-
- /* prepare data */
- for ( i = 0; i < _INPUT_NUM; i++ )
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_NMS_TYPE], &(nms_type));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_MAX_NUM], &(max_num_detections));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_MAX_CLASS], &(maximum_class_per_detection));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_MAX_DETECT], &(maximum_detection_per_class));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_SCORE_TH], &(score_threshold));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_IOU_TH], &(iou_threshold));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_IS_BG], &(is_bg_in_label));
-
- numBatches = in_attr[0]->shape->data[2];
- numAnchors = in_attr[0]->shape->data[1];
- numClasses = in_attr[0]->shape->data[0];
- numOutDetection = out_attr[0]->shape->data[0];
-
- {
- vsi_size_t scores_index = 0;
- vsi_size_t scores_out_index = 0;
- uint32_t kRoiDim = 4;
- vsi_size_t roi_out_index = 0;
- vsi_size_t class_out_index = 0;
- uint32_t* select = (uint32_t*)malloc(numAnchors * numClasses * sizeof(uint32_t));
- float* maxScores = (float*)malloc(numAnchors * sizeof(float));
- uint32_t* scoreInds = (uint32_t*)malloc((numClasses - 1) * sizeof(uint32_t));
-
- for ( n = 0; n < numBatches; n++ )
- {
- float* roiBuffer = &(f32_in_buffer[1][n * numAnchors * kRoiDim]);
- if (nms_type)
- {
- uint32_t select_size = 0;
- uint32_t select_start = 0;
- uint32_t select_len = 0;
- uint32_t numDetections = 0;
- for ( c = 1; c < numClasses; c++ )
- {
- select_start = select_size;
- for ( b = 0; b < numAnchors; b++ )
- {
- const vsi_size_t index = b * numClasses + c;
- float score = f32_in_buffer[0][scores_index + index];
- if (score > score_threshold) {
- select[select_size] = (uint32_t)index;
- select_size++;
- }
- }
- select_len = select_size - select_start;
-
- if ( maximum_detection_per_class < 0 )
- {
- maximum_detection_per_class = select_len;
- }
- numDetections = 0;
- for ( j = 0; (j < select_len && numDetections < (uint32_t)maximum_detection_per_class); j++ )
- {
- // find max score and swap to the front.
- int32_t max_index = _max_element(&(f32_in_buffer[0][scores_index]),
- &(select[select_start]), select_len);
- _swap_element(&(select[select_start]), max_index, j);
-
- // Calculate IoU of the rest, swap to the end (disgard) if needed.
- for ( i = j + 1; i < select_len; i++ )
- {
- vsi_ssize_t roiBase0 = (select[select_start + i] / numClasses) * kRoiDim;
- vsi_ssize_t roiBase1 = (select[select_start + j] / numClasses) * kRoiDim;
- float iou = _getIoUAxisAligned(&(roiBuffer[roiBase0]),
- &(roiBuffer[roiBase1]));
-
- if ( iou >= iou_threshold )
- {
- _swap_element(&(select[select_start]), i, select_len - 1);
- i--;
- select_len--;
- }
- }
- numDetections++;
- }
- select_size = select_start + numDetections;
- }
-
- select_len = select_size;
- select_start = 0;
-
- // Take top maxNumDetections.
- _sort_element_by_score(&(f32_in_buffer[0][scores_index]),
- &(select[select_start]), select_len);
-
- for ( i = 0; i < select_len; i++ )
- {
- uint32_t ind = select[i];
- f32_out_buffer[0][scores_out_index + i] =
- f32_in_buffer[0][scores_index + ind];
- memcpy(&(f32_out_buffer[1][roi_out_index + i * kRoiDim]),
- &roiBuffer[(ind / numClasses) * kRoiDim], kRoiDim * sizeof(float));
- f32_out_buffer[2][class_out_index + i] = (float)((ind % numClasses)
- - (is_bg_in_label ? 0 : 1));
- }
- f32_out_buffer[3][n] = (float)(select_len);
- }
- else
- {
- vsi_size_t numOutClasses = vsi_nn_min(numClasses - 1, (uint32_t)maximum_class_per_detection);
- uint32_t select_size = 0;
- uint32_t select_start = 0;
- uint32_t select_len = 0;
- uint32_t numDetections = 0;
- for ( a = 0; a < numAnchors; a++ )
- {
- // exclude background class: 0
- maxScores[a] = _max_element_value(&(f32_in_buffer[0]
- [scores_index + a * numClasses + 1]), (uint32_t)(numClasses - 1));
- if (maxScores[a] > score_threshold)
- {
- select[select_size] = (uint32_t)a;
- select_size++;
- }
- }
- select_len = select_size - select_start;
-
- if ( max_num_detections < 0 )
- {
- max_num_detections = select_len;
- }
- for ( j = 0; (j < select_len && numDetections < (uint32_t)max_num_detections); j++ )
- {
- // find max score and swap to the front.
- int32_t max_index = _max_element(maxScores,
- &(select[select_start + j]), select_len - j);
- _swap_element(&(select[select_start]), max_index + j, j);
-
- // Calculate IoU of the rest, swap to the end (disgard) if needed.
- for ( i = j + 1; i < select_len; i++ )
- {
- int32_t roiBase0 = select[select_start + i] * kRoiDim;
- int32_t roiBase1 = select[select_start + j] * kRoiDim;
- float iou = _getIoUAxisAligned(&(roiBuffer[roiBase0]),
- &(roiBuffer[roiBase1]));
- if ( iou >= iou_threshold )
- {
- _swap_element(&(select[select_start]), i, select_len - 1);
- i--;
- select_len--;
- }
- }
- numDetections++;
- }
- select_size = select_start + numDetections;
- select_len = select_size;
-
- for ( i = 0; i < select_len; i++ )
- {
- _iota((int32_t*)scoreInds, (uint32_t)(numClasses - 1), 1);
- _sort_element_by_score(&(f32_in_buffer[0][scores_index + select[i] * numClasses]),
- scoreInds, (uint32_t)(numClasses - 1));
- for (c = 0; c < numOutClasses; c++)
- {
- f32_out_buffer[0][scores_out_index + i * numOutClasses + c] =
- f32_in_buffer[0][scores_index + select[i] * numClasses + scoreInds[c]];
- memcpy(&(f32_out_buffer[1][roi_out_index + (i * numOutClasses + c)
- * kRoiDim]), &roiBuffer[select[i] * kRoiDim], kRoiDim * sizeof(float));
- f32_out_buffer[2][class_out_index + i * numOutClasses + c]
- = (float)(scoreInds[c] - (is_bg_in_label ? 0 : 1));
- }
- }
- f32_out_buffer[3][n] = (float)select_len;
- }
- scores_index += numAnchors * numClasses;
- scores_out_index += numOutDetection;
- roi_out_index += numOutDetection * kRoiDim;
- class_out_index += numOutDetection;
- }
-
- if (select) free(select);
- if (maxScores) free(maxScores);
- if (scoreInds) free(scoreInds);
- }
- /* save data */
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _detect_post_nms_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _detect_post_nms_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_DETECT_POST_NMS_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t nms_type = vsi_nn_kernel_param_get_int32( params, "nms_type" );
- int32_t max_num_detections = vsi_nn_kernel_param_get_int32( params, "max_num_detections" );
- int32_t maximum_class_per_detection = vsi_nn_kernel_param_get_int32( params, "maximum_class_per_detection" );
- int32_t maximum_detection_per_class = vsi_nn_kernel_param_get_int32( params, "maximum_detection_per_class" );
- float score_threshold = vsi_nn_kernel_param_get_float32( params, "score_threshold" );
- float iou_threshold = vsi_nn_kernel_param_get_float32( params, "iou_threshold" );
- int32_t is_bg_in_label = vsi_nn_kernel_param_get_int32( params, "is_bg_in_label" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status )
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _DETECT_POST_NMS_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_NMS_TYPE] = vsi_nn_kernel_scalar_create( graph, I32, &nms_type );
- node_params[SCALAR_MAX_NUM] = vsi_nn_kernel_scalar_create( graph, I32, &max_num_detections );
- node_params[SCALAR_MAX_CLASS] = vsi_nn_kernel_scalar_create( graph, I32, &maximum_class_per_detection );
- node_params[SCALAR_MAX_DETECT] = vsi_nn_kernel_scalar_create( graph, I32, &maximum_detection_per_class );
- node_params[SCALAR_SCORE_TH] = vsi_nn_kernel_scalar_create( graph, F32, &score_threshold );
- node_params[SCALAR_IOU_TH] = vsi_nn_kernel_scalar_create( graph, F32, &iou_threshold );
- node_params[SCALAR_IS_BG] = vsi_nn_kernel_scalar_create( graph, I32, &is_bg_in_label );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _DETECT_POST_NMS_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_NMS_TYPE] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_NUM] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_CLASS] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_DETECT] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCORE_TH] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_IOU_TH] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_IS_BG] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( detect_post_nms, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c b/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c
deleted file mode 100644
index 061d5bc..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/** Unary Kernel internal type */
-typedef enum
-{
- UNARY_SIN,
- UNARY_COS,
- UNARY_EXP,
- UNARY_LOG,
- UNARY_NEG,
- UNARY_HSIGMOID,
- UNARY_MISH,
- UNARY_ROUND,
- UNARY_GELU,
- UNARY_HGELU,
- UNARY_SELU,
- UNARY_CELU,
- UNARY_RCP,
- UNARY_SIGN,
- UNARY_SOFTSIGN,
-} unary_type_e;
-
-
-#define _CPU_ARG_NUM (3)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("eltwise_unary_sw")
-
-static float exp_eval(float data)
-{
- return expf(data);
-}
-
-static float sin_eval(float data)
-{
- return sinf(data);
-}
-
-static float cos_eval(float data)
-{
- return cosf(data);
-}
-
-static float log_eval(float data)
-{
- return logf(data);
-}
-
-static float neg_eval(float data)
-{
- return data * -1.0f;
-}
-
-static float hsigmoid_eval(float data, float alpha, float beta)
-{
- data = (float)(alpha * data + beta);
- data = vsi_nn_clamp(data, 0, 1);
-
- return data;
-}
-
-static float soft_plus_eval(float data)
-{
- return log_eval(exp_eval(data) + 1);
-}
-
-static float mish_eval(float data)
-{
- data = (float)(data * tanh(soft_plus_eval(data)));
-
- return data;
-}
-
-static float round_eval(float data)
-{
- data = (float)(vsi_rtne(data));
-
- return data;
-}
-
-static float gelu_eval(float data)
-{
- data = (float)(0.5f * data * (1 + vsi_nn_erf_impl(data / (float)sqrt(2.0f))));
-
- return data;
-}
-
-#define VSI_SQRT_2_RCP_PI 0.7978845834732056f
-static float hgelu_eval(float data)
-{
- float cdf = (float)(0.5f * (1.0f + tanh((VSI_SQRT_2_RCP_PI *
- (data + 0.044715f * data * data * data)))));
-
- return data * cdf;
-}
-
-static float selu_eval(float data, float alpha, float gamma)
-{
- float y0 = alpha * gamma * expf(data) - alpha * gamma;
- float y1 = gamma * data;
- float y = data <= 0 ? y0 : y1;
-
- return y;
-}
-
-static float celu_eval(float x, float alpha)
-{
- float positive = vsi_nn_max(0, x);
- float negative = vsi_nn_min(alpha * (expf(x / alpha) - 1), 0);
-
- return positive + negative;
-}
-
-static float rcp_eval(float x)
-{
- return 1 / x;
-}
-
-static float sign_eval(float x)
-{
- return x > 0 ? 1.0f : x < 0 ? -1.0f : 0;
-}
-
-static float softsign_eval(float x)
-{
- return x / (1.0f + vsi_abs(x));
-}
-
-DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- int32_t i;
- float alpha = 0;
- float beta = 0;
- int32_t unary_type = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &unary_type);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[3], &alpha);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[4], &beta);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- for ( i = 0; i < (int32_t)out_elements; ++i)
- {
- float data = buffer[0][i];
-
- switch (unary_type)
- {
- case UNARY_SIN:
- data = sin_eval(data);
- break;
- case UNARY_COS:
- data = cos_eval(data);
- break;
- case UNARY_EXP:
- data = exp_eval(data);
- break;
- case UNARY_LOG:
- data = log_eval(data);
- break;
- case UNARY_NEG:
- data = neg_eval(data);
- break;
- case UNARY_HSIGMOID:
- data = hsigmoid_eval(data, alpha, beta);
- break;
- case UNARY_MISH:
- data = mish_eval(data);
- break;
- case UNARY_ROUND:
- data = round_eval(data);
- break;
- case UNARY_GELU:
- data = gelu_eval(data);
- break;
- case UNARY_HGELU:
- data = hgelu_eval(data);
- break;
- case UNARY_SELU:
- data = selu_eval(data, alpha, beta);
- break;
- case UNARY_CELU:
- data = celu_eval(data, alpha);
- break;
- case UNARY_RCP:
- data = rcp_eval(data);
- break;
- case UNARY_SIGN:
- data = sign_eval(data);
- break;
- case UNARY_SOFTSIGN:
- data = softsign_eval(data);
- break;
- default:
- break;
- }
- buffer[1][i] = (float)data;
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
-#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
- SAFE_FREE_TENSOR_ATTR(attr[0]);
- SAFE_FREE_TENSOR_ATTR(attr[1]);
-#undef SAFE_FREE_TENSOR_ATTR
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- buffer[i] = NULL;
- }
- }
- return status;
-} /* _eltwise_unary_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-#define INPUT_FUNC_TYPE (2)
-#define INPUT_SCALAR_ALPHA (3)
-#define INPUT_SCALAR_BETA (4)
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _eltwise_unary_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel,
- const unary_type_e unary_type
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
- float beta = vsi_nn_kernel_param_get_float32( params, "beta" );
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
- graph, I32, &unary_type );
- backend_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
- graph, F32, &alpha );
- backend_params[INPUT_SCALAR_BETA] = vsi_nn_kernel_scalar_create(
- graph, F32, &beta );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &backend_params[INPUT_FUNC_TYPE] );
- vsi_nn_kernel_scalar_release( &backend_params[INPUT_SCALAR_ALPHA] );
- vsi_nn_kernel_scalar_release( &backend_params[INPUT_SCALAR_BETA] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
-
- return node;
-} /* _setup() */
-
-#define REGISTER_ELTWISE_UNARY_BACKEND_CPU(KERNEL_NAME, UNARY_TYPE) \
- static vsi_nn_kernel_node_t _##KERNEL_NAME##_setup \
- ( \
- vsi_nn_graph_t * graph, \
- vsi_nn_tensor_t ** inputs, \
- size_t input_num, \
- vsi_nn_tensor_t ** outputs, \
- size_t output_num, \
- const vsi_nn_kernel_param_t * params, \
- vsi_nn_kernel_t * kernel \
- ) \
- { \
- return _setup(graph, inputs, input_num, outputs, output_num, \
- params, kernel, UNARY_TYPE); \
- } \
- REGISTER_BACKEND_CPU( KERNEL_NAME, _##KERNEL_NAME##_setup )
-
-__END_DECLS
-
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( sin, UNARY_SIN )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( cos, UNARY_COS )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( exp, UNARY_EXP )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( log, UNARY_LOG )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( neg, UNARY_NEG )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( hard_sigmoid, UNARY_HSIGMOID )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( mish, UNARY_MISH )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( round, UNARY_ROUND )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( gelu, UNARY_GELU )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( hard_gelu, UNARY_HGELU )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( selu, UNARY_SELU )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( celu, UNARY_CELU )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( rcp, UNARY_RCP )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( sign, UNARY_SIGN )
-REGISTER_ELTWISE_UNARY_BACKEND_CPU( softsign, UNARY_SOFTSIGN )
\ No newline at end of file
diff --git a/src/tim/vx/internal/src/kernel/cpu/erf_cpu.c b/src/tim/vx/internal/src/kernel/cpu/erf_cpu.c
deleted file mode 100644
index cf427f7..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/erf_cpu.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.erf")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _erf_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _ERF_PARAM_NUM _cnt_of_array( _erf_kernel_param_def )
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- size_t i = 0;
-
- /* prepare data */
- for (i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
-
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- for (i = 0; i < out_elements[0]; i ++)
- {
- float x = vsi_nn_erf_impl(f32_in_buffer[0][i]);
- f32_out_buffer[0][i] = x;
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _erf_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _erf_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_ERF_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
-
-
- status = _query_kernel( kernel, inputs, outputs);
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _ERF_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _ERF_PARAM_NUM );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( erf, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/extra_ending_cpu.c b/src/tim/vx/internal/src/kernel/cpu/extra_ending_cpu.c
deleted file mode 100644
index 371aead..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/extra_ending_cpu.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.extra_ending")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _extra_ending_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _EXTRA_ENDING_PARAM_NUM _cnt_of_array( _extra_ending_kernel_param_def )
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- uint8_t *u8_in_buffer[_INPUT_NUM] = {NULL};
- uint8_t *u8_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- int32_t i = 0;
-
- /* prepare data */
- input[1] = (vsi_nn_kernel_tensor_t)param[1];
- in_attr[1] = vsi_nn_kernel_tensor_attr_create( input[1] );
- u8_in_buffer[1] = (uint8_t*)vsi_nn_kernel_tensor_create_buffer( input[1], in_attr[1], FALSE );
- CHECK_PTR_FAIL_GOTO( u8_in_buffer[i], "Create input buffer fail.", final );
-
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(uint8_t);
- u8_out_buffer[i] = (uint8_t *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( u8_out_buffer[i], "Create output buffer fail.", final );
- memset( u8_out_buffer[i], 0, out_bytes[i] );
- }
-
- memcpy(u8_out_buffer[0], u8_in_buffer[1], out_bytes[0]);
-
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- status = vsi_nn_kernel_tensor_write( output[i], out_attr[i],
- u8_out_buffer[i], out_bytes[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- vsi_nn_safe_free(u8_in_buffer[i]);
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- vsi_nn_safe_free(u8_out_buffer[i]);
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _extra_ending_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _extra_ending_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_EXTRA_ENDING_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _EXTRA_ENDING_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _EXTRA_ENDING_PARAM_NUM );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( extra_ending, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/floordiv_cpu.c b/src/tim/vx/internal/src/kernel/cpu/floordiv_cpu.c
deleted file mode 100644
index 99ca050..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/floordiv_cpu.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.floordiv")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _floordiv_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _FLOORDIV_PARAM_NUM _cnt_of_array( _floordiv_kernel_param_def )
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for( i = 0; i < rank && index; i ++ )
- {
- if( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
- return offset;
-}
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
-
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- for (i = 0; i < out_elements[0]; i++)
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- float in0 = 0;
- float in1 = 0;
-
- in0_offset = _expand_offset( i, in_attr[0]->shape->data, (vsi_size_t)in_attr[0]->shape->size,
- in_stride_size[0], out_attr[0]->shape->data );
- in1_offset = _expand_offset( i, in_attr[1]->shape->data, (vsi_size_t)in_attr[1]->shape->size,
- in_stride_size[1], out_attr[0]->shape->data );
- in0 = f32_in_buffer[0][in0_offset];
- in1 = f32_in_buffer[1][in1_offset];
- f32_out_buffer[0][i] = (float)floor(in0 / in1);
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
-
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _floordiv_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _floordiv_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_FLOORDIV_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs);
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _FLOORDIV_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _FLOORDIV_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( floordiv, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/gather_cpu.c b/src/tim/vx/internal/src/kernel/cpu/gather_cpu.c
deleted file mode 100644
index aa02a41..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/gather_cpu.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (4)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.gather")
-
-DEF_KERNEL_EXECUTOR(_gather_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[2] = { NULL };
- uint32_t* buffer_idx = NULL;
- size_t in_elements = 0, out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_size_t i = 0, j = 0, b = 0;
- int32_t block_size = 1, block_num = 1, axis_num = 0, batch_dims = 0;
- vsi_size_t indices_num = 1, batch = 1, in_stride = 1, out_stride = 1;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- in_elements = vsi_nn_kernel_tensor_attr_get_size( attr[0] );
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &block_size);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &block_num);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &axis_num);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[6], &batch_dims);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer_idx = (uint32_t*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer_idx, "Create input1 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- {
- for (i = 0; i < attr[1]->shape->size - (vsi_size_t)batch_dims; i++)
- {
- indices_num *= attr[1]->shape->data[i];
- }
-
- for (; i < attr[1]->shape->size; i++)
- {
- batch *= attr[1]->shape->data[i];
- }
-
- for (i = 0; i < attr[0]->shape->size - (vsi_size_t)batch_dims; i++)
- {
- in_stride *= attr[0]->shape->data[i];
- }
-
- for (i = 0; i < attr[2]->shape->size - (vsi_size_t)batch_dims; i++)
- {
- out_stride *= attr[2]->shape->data[i];
- }
-
- for (b = 0; b < batch; b++)
- {
- for (i = 0; i < (vsi_size_t)block_num; i++)
- {
- for (j = 0; j < indices_num; j++)
- {
- uint32_t indice = buffer_idx[j + indices_num * b];
- vsi_size_t in_index = (i * axis_num + indice) * block_size + b * in_stride;
- if (in_index < in_elements)
- {
- vsi_size_t out_index = (i * indices_num + j) * block_size + b * out_stride;
- memcpy(&(buffer[1][out_index]), &(buffer[0][in_index]), block_size * sizeof(float));
- }
- else
- {
- status = VX_FAILURE;
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- if ( buffer_idx )
- {
- free( buffer_idx );
- }
- for ( i = 0; i < 2; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for ( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _gather_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _gather_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _GATHER_PARAM_NUM _cnt_of_array( _gather_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _gather_exec;
- kernel->info.parameters = _gather_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _gather_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- uint32_t index = 3;
- int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
- int32_t block_num = vsi_nn_kernel_param_get_int32( params, "block_num" );
- int32_t axis_num = vsi_nn_kernel_param_get_int32( params, "axis_num" );
- int32_t batch_dims = vsi_nn_kernel_param_get_int32( params, "batch_dims" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &block_size );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &block_num );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis_num );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &batch_dims );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- vsi_nn_kernel_scalar_release( &backend_params[6] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( gather, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/gather_elements_cpu.c b/src/tim/vx/internal/src/kernel/cpu/gather_elements_cpu.c
deleted file mode 100644
index 65778e5..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/gather_elements_cpu.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _ARG_NUM (1)
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.gather_elements")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _gather_elements_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _GATHER_ELEMENTS_PARAM_NUM _cnt_of_array( _gather_elements_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[2] = { NULL };
- int32_t* buffer_idx = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_size_t a = 0;
- vsi_size_t o = 0;
- vsi_size_t i = 0;
- vsi_size_t outer_size[2] = {1, 1};
- vsi_size_t inner_size[2] = {1, 1};
- vsi_size_t axis_size[2] = {1, 1};
- int32_t axis = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer_idx = (int32_t*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer_idx, "Create input1 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- axis_size[0] = attr[0]->shape->data[axis];
- axis_size[1] = attr[2]->shape->data[axis];
- for (i = 0; i < (vsi_size_t)axis; ++i)
- {
- inner_size[0] *= attr[0]->shape->data[i];
- inner_size[1] *= attr[2]->shape->data[i];
- }
-
- for (i = axis + 1; i < attr[2]->shape->size; ++i)
- {
- outer_size[0] *= attr[0]->shape->data[i];
- outer_size[1] *= attr[2]->shape->data[i];
- }
-
- for (o = 0; o < outer_size[1]; o++)
- {
- for (a = 0; a < axis_size[1]; a++)
- {
- for (i = 0; i < inner_size[1]; i++)
- {
- vsi_ssize_t index = 0;
- vsi_size_t index0 = (o * axis_size[1] + a) * inner_size[1] + i;
- vsi_size_t index1 = 1;
-
- index = (vsi_ssize_t)buffer_idx[index0];
- index = index < 0 ? index + (vsi_ssize_t)axis_size[0] : index;
- index1 = (o * axis_size[0] + index) * inner_size[0] + i;
-
- buffer[1][index0] = buffer[0][index1];
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-final:
- if ( buffer_idx )
- {
- free( buffer_idx );
- }
- for ( i = 0; i < 2; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for ( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _gather_elements_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _gather_elements_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_GATHER_ELEMENTS_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = vsi_nn_kernel_param_get_int32( params, "axis" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _GATHER_ELEMENTS_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- node_params[3] = vsi_nn_kernel_scalar_create( graph, I32, &axis );
- status = vsi_nn_kernel_node_pass_param( node, node_params, _GATHER_ELEMENTS_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[3] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( gather_elements, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/gather_nd_cpu.c b/src/tim/vx/internal/src/kernel/cpu/gather_nd_cpu.c
deleted file mode 100644
index d57cfd4..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/gather_nd_cpu.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (2)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.gather_nd")
-
-DEF_KERNEL_EXECUTOR(_gather_nd_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[2] = { NULL };
- uint32_t* buffer_idx = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- int32_t i = 0;
- int32_t block_size = 1;
- vsi_ssize_t indices_num = 1;
- int32_t coord_stride = 1;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &(block_size));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &(coord_stride));
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer_idx = (uint32_t*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], FALSE );
- CHECK_PTR_FAIL_GOTO( buffer_idx, "Create input1 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- // index number
- for(i = 0; i < (int32_t)attr[1]->shape->size; ++i)
- {
- indices_num *= attr[1]->shape->data[i];
- }
- indices_num /= coord_stride;
-
- if(coord_stride <= 4) // reshape 3D
- {
- vsi_ssize_t stride[4] = {block_size, 0, 0, 0};
- int32_t start_dim = (int32_t)attr[0]->shape->size - coord_stride;
- for(i = 1; i < coord_stride; ++i)
- {
- stride[i] = stride[i - 1] * attr[0]->shape->data[start_dim + i - 1];
- }
-
- for(i = 0; i < indices_num; i++)
- {
- vsi_size_t out_index = i * block_size;
- uint32_t coord[4] = {0};
- vsi_size_t in_index = 0;
- int32_t j = 0;
-
- for(j = 0; j < coord_stride; j++)
- {
- coord[j] = buffer_idx[i * coord_stride + j];
- in_index += coord[j] * stride[j];
- }
- memcpy(&(buffer[1][out_index]), &(buffer[0][in_index]), block_size * sizeof(float));
- }
- }
- else
- {
- status = VSI_FAILURE;
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- if( buffer_idx )
- {
- free( buffer_idx );
- }
- for( i = 0; i < 2; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_yuv420_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _gather_nd_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _GATHER_ND_PARAM_NUM _cnt_of_array( _gather_nd_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _gather_nd_exec;
- kernel->info.parameters = _gather_nd_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _gather_nd_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
- int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 3;
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &block_size );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &coord_dim );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( gather_nd, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/generate_proposals_cpu.c b/src/tim/vx/internal/src/kernel/cpu/generate_proposals_cpu.c
deleted file mode 100644
index 86e0c7e..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/generate_proposals_cpu.c
+++ /dev/null
@@ -1,504 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (4)
-#define _OUTPUT_NUM (3)
- #define _TENSOR_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.generate_proposals")
-
-
-typedef struct vsi_nn_box_encoding_corner_t
-{
- float x1, y1, x2, y2;
-}vsi_nn_box_encoding_corner;
-
-typedef struct vsi_nn_box_encoding_center_t
-{
- float w, h, x, y;
-}vsi_nn_box_encoding_center;
-/*
- * Kernel params
- */
-static vx_param_description_t _generate_proposals_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _GENERATE_PROPOSALS_PARAM_NUM _cnt_of_array( _generate_proposals_kernel_param_def )
-
-
-static void _to_box_encoding_corner
- (
- vsi_nn_box_encoding_center* ctr,
- vsi_nn_box_encoding_corner* cnr
- )
-{
- cnr->x1 = ctr->x - ctr->w / 2;
- cnr->y1 = ctr->y - ctr->h / 2;
- cnr->x2 = ctr->x + ctr->w / 2;
- cnr->y2 = ctr->y + ctr->h / 2;
-}
-
-static void _to_box_encoding_center
- (
- vsi_nn_box_encoding_corner* cnr,
- vsi_nn_box_encoding_center* ctr
- )
-{
- ctr->w = cnr->x2 - cnr->x1;
- ctr->h = cnr->y2 - cnr->y1;
- ctr->x = (cnr->x1 + cnr->x2) / 2;
- ctr->y = (cnr->y1 + cnr->y2) / 2;
-}
-
-static void _iota
- (
- int32_t * data,
- uint32_t len,
- int32_t value
- )
-{
- uint32_t i;
- for (i = 0; i < len; i++)
- {
- data [i] = value;
- value++;
- }
-}
-
-// swap_element is implemented in vsi_nn_kernel_box_with_nms_limit.c
-void swap_element
- (
- uint32_t* list,
- uint32_t first,
- uint32_t second
- );
-
-// max_element is implemented in vsi_nn_kernel_box_with_nms_limit.c
-uint32_t max_element
- (
- float* data,
- uint32_t* index_list,
- uint32_t len
- );
-
-// getIoUAxisAligned is implemented in vsi_nn_kernel_box_with_nms_limit.c
-float getIoUAxisAligned
- (
- const float* roi1,
- const float* roi2
- );
-
-// sort_element_by_score is implemented in vsi_nn_kernel_box_with_nms_limit.c
-void sort_element_by_score
- (
- float* data,
- uint32_t* index_list,
- uint32_t len
- );
-
-void _filter_boxes
- (
- const float* roiBase,
- const float* imageInfoBase,
- float minSize,
- uint32_t* select,
- uint32_t* len
- )
-{
- const uint32_t kRoiDim = 4;
- uint32_t i = 0;
- uint32_t j = 0;
-
- for (j = 0; j < *len; j++)
- {
- const float* roiInfo = roiBase + select[j] * kRoiDim;
- float roiWidth, roiHeight, xRoiCenter, yRoiCenter;
- roiWidth = roiInfo[2] - roiInfo[0];
- roiHeight = roiInfo[3] - roiInfo[1];
- xRoiCenter = roiInfo[0] + roiWidth / 2.0f;
- yRoiCenter = roiInfo[1] + roiHeight / 2.0f;
- if (roiWidth > minSize && roiHeight > minSize && xRoiCenter < imageInfoBase[1]
- && yRoiCenter < imageInfoBase[0])
- {
- select[i] = select[j];
- i++;
- }
- }
- *len = i;
-}
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- float heightStride;
- float widthStride;
- int32_t preNmsTopN;
- int32_t postNmsTopN;
- float iouThreshold;
- float minSize;
-
- /* prepare data */
- for (i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_float32( param[_TENSOR_NUM], &heightStride );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32( param[_TENSOR_NUM + 1], &widthStride );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[_TENSOR_NUM + 2], &preNmsTopN );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( param[_TENSOR_NUM + 3], &postNmsTopN );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32( param[_TENSOR_NUM + 4], &iouThreshold );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32( param[_TENSOR_NUM + 5], &minSize );
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- {
- uint32_t h, w, a, b, j;
- const uint32_t kRoiDim = 4;
- vsi_size_t numBatches = in_attr[0]->shape->data[3];
- vsi_size_t height = in_attr[0]->shape->data[2];
- vsi_size_t width = in_attr[0]->shape->data[1];
- vsi_size_t numAnchors = in_attr[0]->shape->data[0];
- vsi_size_t imageInfoLength = in_attr[3]->shape->data[0];
-
- vsi_size_t batchSize = height * width * numAnchors;
- vsi_size_t roiBufferSize = batchSize * kRoiDim;
-
- float * roiBuffer = (float*)malloc(roiBufferSize * sizeof(float));
- float * roiTransformedBuffer = (float*)malloc(roiBufferSize * sizeof(float));
- uint32_t* select = (uint32_t*)malloc(batchSize * sizeof(uint32_t));
- uint32_t index = 0;
- vsi_size_t scores_index = 0;
- vsi_size_t bboxDeltas_index = 0;
- vsi_size_t imageInfo_index = 0;
- uint32_t scores_out_index = 0;
- uint32_t roi_out_index = 0;
-
- // Compute the roi region for each anchor.
- for(h = 0; h < height; h++)
- {
- float hShift = h * heightStride;
- for(w = 0; w < width; w++)
- {
- float wShift = w * widthStride;
- uint32_t anchor_index = 0;
- for(a = 0; a < numAnchors; a++)
- {
- roiBuffer[index] = f32_in_buffer[2][anchor_index] + wShift;
- roiBuffer[index + 1] = f32_in_buffer[2][anchor_index + 1] + hShift;
- roiBuffer[index + 2] = f32_in_buffer[2][anchor_index + 2] + wShift;
- roiBuffer[index + 3] = f32_in_buffer[2][anchor_index + 3] + hShift;
-
- index += kRoiDim;
- anchor_index += kRoiDim;
- }
- }
- }
-
- for (b = 0; b < numBatches; b++)
- {
- const uint32_t roiLength = 4;
-
- vsi_size_t numRois = batchSize;
- vsi_size_t roiIndex;
- uint32_t select_len;
- int32_t numDetections = 0;
- for (roiIndex = 0; roiIndex < numRois; roiIndex++)
- {
- float imageHeight = f32_in_buffer[3][imageInfo_index];
- float imageWidth = f32_in_buffer[3][imageInfo_index + 1];
- vsi_nn_box_encoding_corner roi_cnr;
- vsi_nn_box_encoding_center roiBefore;
- roi_cnr.x1 = roiBuffer[roiIndex * roiLength];
- roi_cnr.y1 = roiBuffer[roiIndex * roiLength + 1];
- roi_cnr.x2 = roiBuffer[roiIndex * roiLength + 2];
- roi_cnr.y2 = roiBuffer[roiIndex * roiLength + 3];
- _to_box_encoding_center(&roi_cnr, &roiBefore);
- {
- vsi_nn_box_encoding_center roi_ctr;
- vsi_nn_box_encoding_corner roiAfter;
- vsi_nn_box_encoding_corner cliped;
- vsi_size_t idx = bboxDeltas_index + roiIndex * roiLength;
- roi_ctr.w = (float)(exp(f32_in_buffer[1][idx + 2]) * roiBefore.w);
- roi_ctr.h = (float)(exp(f32_in_buffer[1][idx + 3]) * roiBefore.h);
- roi_ctr.x = roiBefore.x + f32_in_buffer[1][idx] * roiBefore.w;
- roi_ctr.y = roiBefore.y + f32_in_buffer[1][idx + 1] * roiBefore.h;
- _to_box_encoding_corner(&roi_ctr, &roiAfter);
- cliped.x1 = vsi_nn_min(vsi_nn_max(roiAfter.x1, 0.0f), imageWidth);
- cliped.y1 = vsi_nn_min(vsi_nn_max(roiAfter.y1, 0.0f), imageHeight);
- cliped.x2 = vsi_nn_min(vsi_nn_max(roiAfter.x2, 0.0f), imageWidth);
- cliped.y2 = vsi_nn_min(vsi_nn_max(roiAfter.y2, 0.0f), imageHeight);
- roiTransformedBuffer[idx] = cliped.x1;
- roiTransformedBuffer[idx + 1] = cliped.y1;
- roiTransformedBuffer[idx + 2] = cliped.x2;
- roiTransformedBuffer[idx + 3] = cliped.y2;
- }
- }
-
- // Find the top preNmsTopN scores.
- _iota((int32_t*)select, (uint32_t)batchSize, 0);
- select_len = (uint32_t)batchSize;
- if(preNmsTopN > 0 && preNmsTopN < (int32_t)batchSize)
- {
- sort_element_by_score(&(f32_in_buffer[0][scores_index]),
- select, (uint32_t)batchSize);
- select_len = preNmsTopN;
- }
-
- // Filter boxes, disgard regions with height or width < minSize.
- _filter_boxes(roiTransformedBuffer, &(f32_in_buffer[3][0]),
- minSize, select, &select_len);
-
- // Apply hard NMS.
- if (postNmsTopN < 0)
- {
- postNmsTopN = select_len;
- }
-
- for (j = 0; (j < select_len && numDetections < postNmsTopN); j++)
- {
- // find max score and swap to the front.
- int32_t max_index = max_element(&(f32_in_buffer[0][scores_index]),
- &(select[j]), select_len - j) + j;
- swap_element(select, max_index, j);
-
- // Calculate IoU of the rest, swap to the end (disgard) ifneeded.
- for (i = j + 1; i < select_len; i++)
- {
- int32_t roiBase0 = select[i] * kRoiDim;
- int32_t roiBase1 = select[j] * kRoiDim;
- float iou = getIoUAxisAligned(&(roiTransformedBuffer[roiBase0]),
- &(roiTransformedBuffer[roiBase1]));
-
- if (iou >= iouThreshold)
- {
- swap_element(select, i, select_len - 1);
- i--;
- select_len--;
- }
- }
- numDetections++;
- }
-
- for (i = 0; i < select_len; i++)
- {
- memcpy(&(f32_out_buffer[1][roi_out_index]),
- &(roiTransformedBuffer[select[i] * kRoiDim]), kRoiDim * sizeof(float));
- f32_out_buffer[0][scores_out_index] =
- f32_in_buffer[0][scores_index + select[i]];
- f32_out_buffer[2][scores_out_index] = (float)b;
- scores_out_index++;
- roi_out_index += kRoiDim;
- }
-
- scores_index += batchSize;
- bboxDeltas_index += roiBufferSize;
- imageInfo_index += imageInfoLength;
- }
-
- vsi_nn_safe_free(roiBuffer);
- vsi_nn_safe_free(roiTransformedBuffer);
- vsi_nn_safe_free(select);
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _generate_proposals_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _generate_proposals_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_GENERATE_PROPOSALS_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- float height_stride = vsi_nn_kernel_param_get_float32( params, "height_stride");
- float width_stride = vsi_nn_kernel_param_get_float32( params, "width_stride");
- int32_t pre_nms_top_n = vsi_nn_kernel_param_get_int32( params, "pre_nms_top_n");
- int32_t post_nms_top_n = vsi_nn_kernel_param_get_int32( params, "post_nms_top_n");
- float iou_threshold = vsi_nn_kernel_param_get_float32(params, "iou_threshold");
- float min_size = vsi_nn_kernel_param_get_float32(params, "min_size");
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _GENERATE_PROPOSALS_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[_TENSOR_NUM ] = vsi_nn_kernel_scalar_create( graph, F32, &height_stride );
- node_params[_TENSOR_NUM + 1] = vsi_nn_kernel_scalar_create( graph, F32, &width_stride );
- node_params[_TENSOR_NUM + 2] = vsi_nn_kernel_scalar_create( graph, I32, &pre_nms_top_n );
- node_params[_TENSOR_NUM + 3] = vsi_nn_kernel_scalar_create( graph, I32, &post_nms_top_n );
- node_params[_TENSOR_NUM + 4] = vsi_nn_kernel_scalar_create( graph, F32, &iou_threshold );
- node_params[_TENSOR_NUM + 5] = vsi_nn_kernel_scalar_create( graph, F32, &min_size );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _GENERATE_PROPOSALS_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &node_params[_TENSOR_NUM ] );
- vsi_nn_kernel_scalar_release( &node_params[_TENSOR_NUM + 1] );
- vsi_nn_kernel_scalar_release( &node_params[_TENSOR_NUM + 2] );
- vsi_nn_kernel_scalar_release( &node_params[_TENSOR_NUM + 3] );
- vsi_nn_kernel_scalar_release( &node_params[_TENSOR_NUM + 4] );
- vsi_nn_kernel_scalar_release( &node_params[_TENSOR_NUM + 5] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( generate_proposals, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/group_normalization_cpu.c b/src/tim/vx/internal/src/kernel/cpu/group_normalization_cpu.c
deleted file mode 100644
index 82b2482..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/group_normalization_cpu.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (2)
-#define _CPU_INPUT_NUM (3)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.group_norm")
-
-DEF_KERNEL_EXECUTOR(_group_norm_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t spaceOrg = 0;
- float eps = .0f;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[4], &eps);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &spaceOrg);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create input1 buffer fail.", final );
-
- buffer[3] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
- memset( buffer[3], 0, out_elements * sizeof(float) );
-
- {
- vsi_size_t b = 0, c = 0;
- vsi_size_t height = attr[0]->shape->data[1];
- vsi_size_t width = attr[0]->shape->data[0];
- vsi_size_t ch = attr[0]->shape->size > 2 ? attr[0]->shape->data[2] : 1;
- vsi_size_t bh = attr[0]->shape->size > 3 ? attr[0]->shape->data[3] : 1;
- vsi_size_t spatial = height * width;
-
- for (b = 0; b < bh; b++)
- {
- for (c = 0; c < ch; c++)
- {
- vsi_size_t page = c * spatial + b * (spatial * ch);
- vsi_size_t paraIdx = c * attr[1]->shape->data[0];
- float sum = .0f;
- float sumsq = .0f;
- float mean = .0f;
- float vari = .0f;
- float data = 0;
-
- for (i = 0; i < spatial; i++)
- {
- vsi_size_t index = page + i;
- sum += buffer[0][index];
- }
-
- mean = sum / spatial;
- for (i = 0; i < spatial; i++)
- {
- vsi_size_t index = page + i;
- data = buffer[0][index] - mean;
- sumsq += data * data;
- }
-
- vari = sumsq / spatial;
- vari = (float)(1.0 / sqrtf(vari + eps));
-
- for (i = 0; i < spatial; i++)
- {
- float normVal = 0;
- vsi_size_t index = page + i;
- vsi_size_t tmpIdx = paraIdx + i / spaceOrg;
- float scaleVal = buffer[2][tmpIdx];
- float biasVal = buffer[1][tmpIdx];
-
- data = buffer[0][index] - mean;
- normVal = data * vari * scaleVal + biasVal;
- buffer[3][index] = normVal;
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _group_norm_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _group_normalization_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _GROUP_NORMALIZATION_PARAM_NUM _cnt_of_array( _group_normalization_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _group_norm_exec;
- kernel->info.parameters = _group_normalization_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _group_normalization_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static int32_t _optimize_gn_shape_cpu
- (
- vsi_nn_tensor_t ** inputs,
- vsi_size_t group_size,
- int32_t group_num,
- vsi_size_t* opt_shape
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_size_t group_shape[VSI_NN_MAX_DIM_NUM] = {0};
- vsi_size_t new_rank = 0;
- group_shape[0] = inputs[0]->attr.size[0];
- group_shape[1] = inputs[0]->attr.size[1];
- group_shape[2] = group_size;
-
- vsi_nn_kernel_optimize_element_shape(group_shape, 3, opt_shape, &new_rank );
-
- if (new_rank == 2)
- {
- opt_shape[2] = group_num;
- opt_shape[3] = inputs[0]->attr.dim_num > 3 ? inputs[0]->attr.size[3] : 1;
- }
- else
- {
- status = VSI_FAILURE;
- }
-
- return status;
-}
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- vsi_nn_kernel_tensor_t rs_input = NULL, rs_output = NULL;
- vsi_size_t new_shape[VSI_NN_MAX_DIM_NUM] = { 1, 1, 1, 1 };
- int32_t group_num = vsi_nn_kernel_param_get_int32( params, "group_num" );
- vsi_size_t group_size = inputs[0]->attr.size[2] / group_num;
- int32_t spaceOrg = (int32_t)(inputs[0]->attr.size[0] * inputs[0]->attr.size[1]);
-
- status = _optimize_gn_shape_cpu(inputs, group_size, group_num, new_shape);
- if ( VSI_SUCCESS != status )
- {
- goto final;
- }
- rs_input = vsi_nn_kernel_tensor_reshape(inputs[0]->t, new_shape, 4);
- rs_output = vsi_nn_kernel_tensor_reshape(outputs[0]->t, new_shape, 4);
-
- status = _query_kernel( inputs, outputs, kernel );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
- uint32_t index = 0;
- /* Set inputs and outputs */
- backend_params[index++] = rs_input;
- backend_params[index++] = (vsi_nn_kernel_node_param_t)inputs[1]->t;
- backend_params[index++] = (vsi_nn_kernel_node_param_t)inputs[2]->t;
- backend_params[index++] = rs_output;
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &eps );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &spaceOrg );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
-final:
- if (rs_input)
- {
- vsi_nn_kernel_tensor_release( &rs_input );
- }
- if (rs_output)
- {
- vsi_nn_kernel_tensor_release( &rs_output );
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( group_norm, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/grucell_activation_cpu.c b/src/tim/vx/internal/src/kernel/cpu/grucell_activation_cpu.c
deleted file mode 100644
index 1468b26..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/grucell_activation_cpu.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-#include
-#include
-#include
-
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.grucell_activation")
-
-/*
- * Kernel params
- */
-static vx_param_description_t _grucell_activation_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _GRUCELL_ACTIVATION_PARAM_NUM _cnt_of_array( _grucell_activation_kernel_param_def )
-
-#define _IO_COUNT_DEFAULT (5)
-
-static vx_param_description_t _grucell_activation_separated_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _GRUCELL_ACTIVATION_SEPARATED_PARAM_NUM _cnt_of_array( _grucell_activation_separated_kernel_param_def )
-#define _IO_COUNT_SEPARATED (15)
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_ssize_t i = 0;
- vsi_ssize_t batch = 0;
- vsi_ssize_t hidden_units = 0;
- float * buffer[_IO_COUNT_DEFAULT] = { NULL };
- vsi_status status = VSI_FAILURE;
- vsi_nn_activation_e gate_activation;
- vsi_nn_activation_e candidate_activation;
- vsi_nn_kernel_tensor_t tensors[_IO_COUNT_DEFAULT] = { NULL };
- vsi_nn_kernel_tensor_attr_t* attr[_IO_COUNT_DEFAULT] = { NULL };
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
- tensors[4] = (vsi_nn_kernel_tensor_t)param[4];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- attr[4] = vsi_nn_kernel_tensor_attr_create( tensors[4] );
-
- /* z{t_} */
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input buffer fail.", final );
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create input buffer fail.", final );
- buffer[3] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[3], attr[3], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create input buffer fail.", final );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &gate_activation);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[6], &candidate_activation);
- CHECK_STATUS_FAIL_GOTO(status, final);
-
- batch = attr[0]->shape->data[1];
- hidden_units = attr[0]->shape->data[0];
-
- for( i = 0; i < batch * hidden_units; i++ )
- {
- float zt = vsi_nn_activation(buffer[0][i], gate_activation);
- float ht_ = vsi_nn_activation(buffer[1][i], candidate_activation);
- float ht_1 = buffer[2][i];
- float ht = zt * (ht_1 - ht_) + ht_;
-
- buffer[3][i] = ht;
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], batch * hidden_units );
- CHECK_STATUS_FAIL_GOTO( status, final );
- status = vsi_nn_kernel_tensor_write_from_float( tensors[4], attr[4],
- buffer[3], batch * hidden_units );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < 5; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _compute() */
-
-DEF_KERNEL_EXECUTOR(_compute_separated)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_ssize_t i = 0, j = 0;
- vsi_ssize_t batch = 0;
- vsi_ssize_t hidden_units = 0;
- float * buffer[_IO_COUNT_SEPARATED] = { NULL };
- vsi_status status = VSI_FAILURE;
- vsi_nn_activation_e gate_activation;
- vsi_nn_activation_e candidate_activation;
- vsi_bool use_cudnn_implementation;
- grucell_activation_input_layout_e input_layout = GRUCELL_ACTIVATION_INPUT_LAYOUT_ALL_NC;
- vsi_nn_kernel_tensor_t tensors[_IO_COUNT_SEPARATED] = { NULL };
- vsi_nn_kernel_tensor_attr_t* attr[_IO_COUNT_SEPARATED] = { NULL };
- float *i_r_base = NULL, *i_c_base = NULL, *i_u_base = NULL;
- float *r_r_base = NULL, *r_u_base = NULL, *r_c_base = NULL;
- float cond_reset = 0.f, cond_update = 0.f, cond_candidate = 0.f;
- float i_r = 0.f, i_u = 0.f, i_c = 0.f, r_r = 0.f, r_u = 0.f, r_c = 0.f;
- float bias_r = 0.f, bias_u = 0.f, bias_c = 0.f;
- float r = 0.f, u = 0.f, c = 0.f, state = 0.f;
-
- for(i = 0; i < _IO_COUNT_SEPARATED; i++)
- {
- tensors[i] = (vsi_nn_kernel_tensor_t)param[i];
- attr[i] = vsi_nn_kernel_tensor_attr_create( tensors[i] );
- }
-
- /* z{t_} */
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input buffer fail.", final );
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- buffer[3] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[3], attr[3], TRUE );
-
- buffer[4] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[4], attr[4], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[4], "Create input buffer fail.", final );
- buffer[5] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[5], attr[5], TRUE );
- buffer[6] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[6], attr[6], TRUE );
-
- buffer[7] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[7], attr[7], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[7], "Create input buffer fail.", final );
- buffer[8] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[8], attr[8], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[8], "Create input buffer fail.", final );
- buffer[9] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[9], attr[9], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[9], "Create input buffer fail.", final );
-
- buffer[10] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[10], attr[10], TRUE );
- buffer[11] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[11], attr[11], TRUE );
- buffer[12] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[12], attr[12], TRUE );
-
- buffer[13] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[13], attr[13], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[13], "Create input buffer fail.", final );
- buffer[14] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[14], attr[14], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[14], "Create input buffer fail.", final );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[15], &gate_activation);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[16], &candidate_activation);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[17], &use_cudnn_implementation);
- CHECK_STATUS_FAIL_GOTO(status, final);
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[18], &input_layout);
- CHECK_STATUS_FAIL_GOTO(status, final);
-
- if(GRUCELL_ACTIVATION_INPUT_LAYOUT_ALL_NC == input_layout)
- {
- batch = attr[1]->shape->data[1];
- hidden_units = attr[1]->shape->data[0];
-
- if(buffer[2] == NULL)
- {
- hidden_units = hidden_units / 3;
- }
-
- for( i = 0; i < batch; i++ )
- {
- float* input_base = buffer[0] + i * hidden_units;
- float* output_base = buffer[13] + i * hidden_units;
-
- if(buffer[2] == NULL)
- {
- float* input_fc_base = buffer[1] + i * hidden_units * 3;
- float* recurrent_fc_base = buffer[4] + i * hidden_units * 3;
-
- i_r_base = input_fc_base + 0 * hidden_units;
- i_u_base = input_fc_base + 1 * hidden_units;
- i_c_base = input_fc_base + 2 * hidden_units;
-
- r_r_base = recurrent_fc_base + 0 * hidden_units;
- r_u_base = recurrent_fc_base + 1 * hidden_units;
- r_c_base = recurrent_fc_base + 2 * hidden_units;
- }
- else
- {
- i_r_base = buffer[1] + i * hidden_units;
- i_u_base = buffer[2] + i * hidden_units;
- i_c_base = buffer[3] + i * hidden_units;
- r_r_base = buffer[4] + i * hidden_units;
- r_u_base = buffer[5] + i * hidden_units;
- r_c_base = buffer[6] + i * hidden_units;
- }
-
- for( j = 0; j < hidden_units; j++ )
- {
- cond_reset = buffer[10] ? buffer[10][j] : cond_reset;
- cond_update = buffer[11] ? buffer[11][j] : cond_update;
- cond_candidate = buffer[12] ? buffer[12][j] : cond_candidate;
-
- bias_r = buffer[7][j];
- bias_u = buffer[8][j];
- bias_c = buffer[9][j];
-
- i_r = i_r_base[j];
- i_u = i_u_base[j];
- i_c = i_c_base[j];
-
- r_r = r_r_base[j];
- r_u = r_u_base[j];
- r_c = r_c_base[j];
-
- r = vsi_nn_activation(i_r + cond_reset + r_r + bias_r, gate_activation);
- u = vsi_nn_activation(i_u + cond_update + r_u + bias_u, gate_activation);
- c = vsi_nn_activation(i_c + cond_candidate + r * (r_c + bias_c), candidate_activation);
- state = u * (input_base[j] - c) + c;
-
- output_base[j] = state;
- }
- }
- }
- else
- {
- vsi_bool input_transposed = FALSE;
- float* input_base = buffer[0];
- float* output_base = buffer[13];
- float* curr_input = NULL;
- float* curr_output = NULL;
-
- batch = attr[1]->shape->data[0];
- hidden_units = attr[1]->shape->data[1];
-
- if(buffer[2] == NULL)
- {
- hidden_units = hidden_units / 3;
- i_r_base = buffer[1] + 0 * hidden_units * batch;
- i_u_base = buffer[1] + 1 * hidden_units * batch;
- i_c_base = buffer[1] + 2 * hidden_units * batch;
- r_r_base = buffer[4] + 0 * hidden_units * batch;
- r_u_base = buffer[4] + 1 * hidden_units * batch;
- r_c_base = buffer[4] + 2 * hidden_units * batch;
- }
- else
- {
- i_r_base = buffer[1];
- i_u_base = buffer[2];
- i_c_base = buffer[3];
- r_r_base = buffer[4];
- r_u_base = buffer[5];
- r_c_base = buffer[6];
- }
-
- if(GRUCELL_ACTIVATION_INPUT_LAYOUT_INPUT_NC_FC_CN == input_layout)
- {
- input_transposed = FALSE;
- }
- else
- {
- input_transposed = TRUE;
- }
-
- for( i = 0; i < hidden_units; i++ )
- {
- cond_reset = buffer[10] ? buffer[10][i] : cond_reset;
- cond_update = buffer[11] ? buffer[11][i] : cond_update;
- cond_candidate = buffer[12] ? buffer[12][i] : cond_candidate;
- bias_r = buffer[7][i];
- bias_u = buffer[8][i];
- bias_c = buffer[9][i];
-
- for( j = 0; j < batch; j++ )
- {
- if(input_transposed)
- {
- curr_input = &input_base[i * batch + j];
- curr_output = &output_base[i * batch + j];
- }
- else
- {
- curr_input = &input_base[j * hidden_units + i];
- curr_output = &output_base[j * hidden_units + i];
- }
-
- i_r = i_r_base[i * batch + j];
- i_u = i_u_base[i * batch + j];
- i_c = i_c_base[i * batch + j];
- r_r = r_r_base[i * batch + j];
- r_u = r_u_base[i * batch + j];
- r_c = r_c_base[i * batch + j];
-
- r = vsi_nn_activation(i_r + cond_reset + r_r + bias_r, gate_activation);
- u = vsi_nn_activation(i_u + cond_update + r_u + bias_u, gate_activation);
- c = vsi_nn_activation(i_c + cond_candidate + r * (r_c + bias_c), candidate_activation);
- state = u * (*curr_input - c) + c;
-
- *curr_output = state;
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[13], attr[13],
- buffer[13], batch * hidden_units );
- CHECK_STATUS_FAIL_GOTO( status, final );
- status = vsi_nn_kernel_tensor_write_from_float( tensors[14], attr[14],
- buffer[13], batch * hidden_units );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _IO_COUNT_SEPARATED; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _compute() */
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs,
- int32_t gate_activation,
- int32_t candidate_activation,
- int32_t input_category,
- vsi_bool use_cudnn_implementation,
- int32_t* param_count,
- int32_t* input_count,
- int32_t* output_count
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- if(input_category == 0)
- {
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _grucell_activation_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _grucell_activation_kernel_param_def );
- *param_count = _GRUCELL_ACTIVATION_PARAM_NUM;
- *input_count = 3;
- *output_count = 2;
- status = VSI_SUCCESS;
- }
- else
- {
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute_separated;
- kernel->info.parameters = _grucell_activation_separated_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _grucell_activation_separated_kernel_param_def );
- *param_count = _GRUCELL_ACTIVATION_SEPARATED_PARAM_NUM;
- *input_count = 13;
- *output_count = 2;
- status = VSI_SUCCESS;
- }
- return status;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t* node_params = NULL;
- vsi_nn_kernel_node_t node = NULL;
- int32_t i = 0;
- int32_t j = 0;
- int32_t param_count = 0;
- int32_t input_count = 0;
- int32_t output_count = 0;
- int32_t gate_activation = vsi_nn_kernel_param_get_int32( params, "gate_activation" );
- int32_t candidate_activation = vsi_nn_kernel_param_get_int32( params, "candidate_activation" );
- int32_t input_category = vsi_nn_kernel_param_get_int32( params, "input_category" );
- int32_t use_cudnn_implementation = vsi_nn_kernel_param_get_int32( params, "use_cudnn_implementation" );
- grucell_activation_input_layout_e input_layout = vsi_nn_kernel_param_get_int32( params, "input_layout" );
- vsi_nn_tensor_t** _inputs = NULL;
-
- status = _query_kernel( kernel, inputs, outputs, gate_activation, candidate_activation,
- input_category, use_cudnn_implementation, ¶m_count, &input_count, &output_count );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- _inputs = (vsi_nn_tensor_t**)malloc(input_count * sizeof(vsi_nn_tensor_t**));
- CHECK_PTR_FAIL_GOTO( _inputs, "Create buffer fail.", final );
- node_params = (vsi_nn_kernel_node_param_t *)malloc(sizeof(vsi_nn_kernel_node_param_t) * param_count);
- CHECK_PTR_FAIL_GOTO( node_params, "Create buffer fail.", final );
- for(i = 0; i < input_count; i++)
- {
- _inputs[i] = inputs[i];
- }
-
- j = input_count + output_count;
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, param_count,
- _inputs, input_count, outputs, output_count );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &gate_activation );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &candidate_activation );
- if(input_category != 0)
- {
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &use_cudnn_implementation );
- node_params[j++] = vsi_nn_kernel_scalar_create(graph, I32, &input_layout );
- }
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, param_count );
- if(input_category != 0)
- {
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- }
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- vsi_nn_kernel_scalar_release( &node_params[--j] );
- }
- }
-
-final:
- vsi_nn_safe_free(_inputs);
- vsi_nn_safe_free(node_params);
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( grucell_activation, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/grucell_activation_sma_cpu.c b/src/tim/vx/internal/src/kernel/cpu/grucell_activation_sma_cpu.c
deleted file mode 100644
index 783f779..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/grucell_activation_sma_cpu.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (3)
-#define _OUTPUT_NUM (2)
-#define _IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.grucell_activation_sma")
-
-/*
- * Kernel params
- */
-static vx_param_description_t _grucell_activation_sma_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _GRUCELL_ACTIVATION_SMA_PARAM_NUM _cnt_of_array( _grucell_activation_sma_kernel_param_def )
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_size_t i = 0;
- vsi_size_t batch = 0;
- vsi_size_t hidden_units = 0;
- float * buffer[_IO_NUM] = { NULL };
- vsi_nn_kernel_tensor_t tensors[_IO_NUM] = { NULL };
- vsi_nn_kernel_tensor_attr_t* attr[_IO_NUM] = { NULL };
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
- tensors[4] = (vsi_nn_kernel_tensor_t)param[4];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- attr[4] = vsi_nn_kernel_tensor_attr_create( tensors[4] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input buffer fail.", final );
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create input buffer fail.", final );
- buffer[3] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[3], attr[3], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create input buffer fail.", final );
-
- batch = attr[0]->shape->data[1];
- hidden_units = attr[0]->shape->data[0];
-
- for( i = 0; i < batch * hidden_units; i++ )
- {
- buffer[3][i] = (buffer[0][i] - buffer[1][i]) * buffer[2][i] + buffer[1][i];
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], batch * hidden_units );
- CHECK_STATUS_FAIL_GOTO( status, final );
- status = vsi_nn_kernel_tensor_write_from_float( tensors[4], attr[4],
- buffer[3], batch * hidden_units );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _compute() */
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _grucell_activation_sma_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _grucell_activation_sma_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_SMA_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _GRUCELL_ACTIVATION_SMA_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _GRUCELL_ACTIVATION_SMA_PARAM_NUM );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( grucell_activation_sma, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/grucell_activation_z_h_cpu.c b/src/tim/vx/internal/src/kernel/cpu/grucell_activation_z_h_cpu.c
deleted file mode 100644
index a5bd220..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/grucell_activation_z_h_cpu.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (GRUCELL_ACT_Z_H_IN_CNT)
-#define _OUTPUT_NUM (GRUCELL_ACT_Z_H_OUT_CNT)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.grucell_activation_z_h")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _grucell_activation_z_h_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED }, /*activation*/
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED }, /*recurrent_activation*/
-};
-#define _GRUCELL_ACTIVATION_Z_H_PARAM_NUM _cnt_of_array( _grucell_activation_z_h_kernel_param_def )
-#define SCALAR_ACTIVATION (7)
-#define SCALAR_R_ACTIVATION (8)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- vsi_size_t i, b;
- int32_t activation = 0;
- int32_t recurrent_activation = 0;
- vsi_size_t n_batch = 0;
- vsi_size_t n_cell = 0;
-
- /* prepare data */
- for ( i = 0; i < _INPUT_NUM; i++ )
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- if (input[i])
- {
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- }
-
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- if (output[i])
- {
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ACTIVATION], &activation );
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_R_ACTIVATION],
- &recurrent_activation );
- CHECK_STATUS_FAIL_GOTO(status, final );
- n_cell = in_attr[GRUCELL_ACT_Z_H_HSTATE]->shape->data[0];
- n_batch = in_attr[GRUCELL_ACT_Z_H_HSTATE]->shape->data[1];
-
- for (b = 0; b < n_batch; b ++)
- {
- for (i = 0; i < n_cell; i++)
- {
- vsi_size_t index = i + n_cell * b;
- float data_z_t = 0;
- float data_h_t = 0;
- float hstate_in = f32_in_buffer[GRUCELL_ACT_Z_H_HSTATE][index];
- float dst = 0;
-
- data_z_t = f32_in_buffer[GRUCELL_ACT_Z_H_I_FC_Z][index];
- data_z_t += f32_in_buffer[GRUCELL_ACT_Z_H_H_FC_Z][index];
- data_z_t = vsi_nn_activation(data_z_t, recurrent_activation);
-
- data_h_t = f32_in_buffer[GRUCELL_ACT_Z_H_I_FC_H][index];
- data_h_t += f32_in_buffer[GRUCELL_ACT_Z_H_H_FC_H][index];
- data_h_t = vsi_nn_activation(data_h_t, activation);
-
- dst = (1 - data_z_t ) * data_h_t + data_z_t * hstate_in;
-
- f32_out_buffer[GRUCELL_ACT_Z_H_OUT_OUTPUT][index] = dst;
- f32_out_buffer[GRUCELL_ACT_Z_H_OUT_HSTATE][index] = dst;
- }
- }
-
- /* save data */
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (output[i])
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _grucell_activation_z_h_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _grucell_activation_z_h_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_Z_H_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t activation = vsi_nn_kernel_param_get_int32( params, "activation" );
- int32_t recurrent_activation = vsi_nn_kernel_param_get_int32( params, "recurrent_activation" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _GRUCELL_ACTIVATION_Z_H_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- node_params[SCALAR_ACTIVATION] = vsi_nn_kernel_scalar_create(
- graph, I32, &activation );
- node_params[SCALAR_R_ACTIVATION] = vsi_nn_kernel_scalar_create(
- graph, I32, &recurrent_activation );
- status = vsi_nn_kernel_node_pass_param( node, node_params, _GRUCELL_ACTIVATION_Z_H_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ACTIVATION] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_R_ACTIVATION] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( grucell_activation_z_h, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/grucell_h_times_activation_r_cpu.c b/src/tim/vx/internal/src/kernel/cpu/grucell_h_times_activation_r_cpu.c
deleted file mode 100644
index b61f92e..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/grucell_h_times_activation_r_cpu.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (3)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.grucell_h_times_activation_r")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _grucell_h_times_activation_r_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED }, /*recurrent_activation*/
- // Add kererl parameters here
-};
-#define _GRUCELL_H_TIMES_ACTIVATION_R_PARAM_NUM _cnt_of_array( _grucell_h_times_activation_r_kernel_param_def )
-#define SCALAR_R_ACTIVATION (4)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- vsi_size_t i, b;
- int32_t recurrent_activation = 0;
- vsi_size_t n_batch = 0;
- vsi_size_t n_cell = 0;
-
- /* prepare data */
- for( i = 0; i < _INPUT_NUM; i++ )
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- if (input[i])
- {
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- }
-
- for( i = 0; i < _OUTPUT_NUM; i++ )
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- if (output[i])
- {
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_R_ACTIVATION],
- &recurrent_activation );
- CHECK_STATUS_FAIL_GOTO(status, final );
- n_cell = in_attr[0]->shape->data[0];
- n_batch = in_attr[0]->shape->data[1];
-
- for (b = 0; b < n_batch; b ++)
- {
- for (i = 0; i < n_cell; i++)
- {
- vsi_size_t index = i + n_cell * b;
- float data_r_t = 0;
- float r_times_h = 0;
- float hstate_in = f32_in_buffer[0][index];
-
- data_r_t = f32_in_buffer[1][index];
- data_r_t += f32_in_buffer[2][index];
-
- data_r_t = vsi_nn_activation(data_r_t, recurrent_activation);
-
- r_times_h = hstate_in * data_r_t;
-
- f32_out_buffer[0][index] = r_times_h;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (output[i])
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _grucell_h_times_activation_r_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _grucell_h_times_activation_r_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_GRUCELL_H_TIMES_ACTIVATION_R_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t recurrent_activation = vsi_nn_kernel_param_get_int32( params, "recurrent_activation" );
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _GRUCELL_H_TIMES_ACTIVATION_R_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_R_ACTIVATION] = vsi_nn_kernel_scalar_create(
- graph, I32, &recurrent_activation );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _GRUCELL_H_TIMES_ACTIVATION_R_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_R_ACTIVATION] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( grucell_h_times_activation_r, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/grucell_reset_after_activation_cpu.c b/src/tim/vx/internal/src/kernel/cpu/grucell_reset_after_activation_cpu.c
deleted file mode 100644
index cfd0eb1..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/grucell_reset_after_activation_cpu.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2021 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (GRUCELL_ACT_IN_CNT)
-#define _OUTPUT_NUM (GRUCELL_ACT_OUT_CNT)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.grucell_reset_after_activation")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _grucell_reset_after_activation_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED }, /*activation*/
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED }, /*recurrent_activation*/
- // Add kererl parameters here
-};
-#define _GRUCELL_RESET_AFTER_ACTIVATION_PARAM_NUM _cnt_of_array( _grucell_reset_after_activation_kernel_param_def )
-#define SCALAR_ACTIVATION (9)
-#define SCALAR_R_ACTIVATION (10)
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- vsi_size_t i, b;
- int32_t activation = 0;
- int32_t recurrent_activation = 0;
- vsi_size_t n_batch = 0;
- vsi_size_t n_cell = 0;
-
- /* prepare data */
- for ( i = 0; i < _INPUT_NUM; i++ )
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- if (input[i])
- {
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- }
-
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- if (output[i])
- {
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ACTIVATION], &activation );
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_R_ACTIVATION],
- &recurrent_activation );
- CHECK_STATUS_FAIL_GOTO(status, final );
- n_cell = in_attr[GRUCELL_ACT_H_STATE]->shape->data[0];
- n_batch = in_attr[GRUCELL_ACT_H_STATE]->shape->data[1];
-
- for (b = 0; b < n_batch; b ++)
- {
- for (i = 0; i < n_cell; i++)
- {
- vsi_size_t index = i + n_cell * b;
- float data_z_t = 0;
- float data_r_t = 0;
- float data_h_t = 0;
- float r_times_h = 0;
- float hstate_in = f32_in_buffer[GRUCELL_ACT_H_STATE][index];
- float dst = 0;
-
- data_z_t = f32_in_buffer[GRUCELL_ACT_I_FC_Z][index];
- data_r_t = f32_in_buffer[GRUCELL_ACT_I_FC_R][index];
- data_h_t = f32_in_buffer[GRUCELL_ACT_I_FC_H][index];
- data_z_t += f32_in_buffer[GRUCELL_ACT_H_FC_Z][index];
- data_r_t += f32_in_buffer[GRUCELL_ACT_H_FC_R][index];
-
- data_z_t = vsi_nn_activation(data_z_t, recurrent_activation);
- data_r_t = vsi_nn_activation(data_r_t, recurrent_activation);
-
- r_times_h = f32_in_buffer[GRUCELL_ACT_H_FC_H][index] * data_r_t;
- data_h_t += r_times_h;
-
- data_h_t = vsi_nn_activation(data_h_t, activation);
-
- dst = (1 - data_z_t ) * data_h_t + data_z_t * hstate_in;
-
- f32_out_buffer[GRUCELL_ACT_OUT_OUTPUT][index] = dst;
- f32_out_buffer[GRUCELL_ACT_OUT_H_STATE][index] = dst;
- }
- }
-
- /* save data */
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (output[i])
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _grucell_reset_after_activation_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _grucell_reset_after_activation_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_GRUCELL_RESET_AFTER_ACTIVATION_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t activation = vsi_nn_kernel_param_get_int32( params, "activation" );
- int32_t recurrent_activation = vsi_nn_kernel_param_get_int32( params, "recurrent_activation" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _GRUCELL_RESET_AFTER_ACTIVATION_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_ACTIVATION] = vsi_nn_kernel_scalar_create(
- graph, I32, &activation );
- node_params[SCALAR_R_ACTIVATION] = vsi_nn_kernel_scalar_create(
- graph, I32, &recurrent_activation );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _GRUCELL_RESET_AFTER_ACTIVATION_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ACTIVATION] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_R_ACTIVATION] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( grucell_reset_after_activation, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/heatmap_max_keypoint_cpu.c b/src/tim/vx/internal/src/kernel/cpu/heatmap_max_keypoint_cpu.c
deleted file mode 100644
index 61f6cd2..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/heatmap_max_keypoint_cpu.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (2)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.heatmap_max_keypoint")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _heatmap_max_keypoint_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _HEATMAP_MAX_KEYPOINT_PARAM_NUM _cnt_of_array( _heatmap_max_keypoint_kernel_param_def )
-
-// This function uses Taylor expansion up to the quatratic term to approximate bicubic
-// upscaling result.
-// 2nd order Taylor expansion: D(x) = D - b'x + 1/2 * x'Ax
-// where D = grid[1][1], Taylor expansion center, the original score,
-// x = delta, the correction on max keypoint position,
-// D(x) = deltaScore, the accuracy score after correction
-static void _solve_for_delta
- (
- const float grid[3][3],
- float* delta,
- float* deltaScore,
- float fpAtol,
- float fpRtol
- )
-{
- // b: negative 1st order derivative at center
- // A: Hessian matrix at center (2nd order derivative)
- float A[2][2], b[2];
- float crossProd1, crossProd2;
- float detA;
- b[0] = -(grid[1][2] - grid[1][0]) / 2.0f;
- b[1] = -(grid[2][1] - grid[0][1]) / 2.0f;
- A[0][0] = grid[1][0] - 2.0f * grid[1][1] + grid[1][2];
- A[0][1] = (grid[2][2] - grid[2][0] - grid[0][2] + grid[0][0]) / 4.0f;
- A[1][0] = A[0][1];
- A[1][1] = grid[0][1] - 2.0f * grid[1][1] + grid[2][1];
-
- // solve Ax=b, where x=delta -> delta = inv(A) * b
- crossProd1 = A[0][0] * A[1][1];
- crossProd2 = A[0][1] * A[1][0];
- detA = crossProd1 - crossProd2;
- // check if A is invertible
- if (fabs(detA) < (fpAtol + fpRtol * crossProd1)) return;
- delta[0] = (A[1][1] * b[0] - A[0][1] * b[1]) / detA;
- delta[1] = (A[0][0] * b[1] - A[1][0] * b[0]) / detA;
-
- // clip out of range delta, i.e. delta > 3/2
- if (fabs(delta[0]) > 1.5f || fabs(delta[1]) > 1.5f)
- {
- float scale = (float)(1.5f / vsi_nn_max(fabs(delta[0]), fabs(delta[1])));
- delta[0] *= scale;
- delta[1] *= scale;
- }
-
- *deltaScore = grid[1][1] - b[0] * delta[0] - b[1] * delta[1] +
- ((A[0][0] * delta[0] + A[0][1] * delta[1]) * delta[0] +
- (A[1][0] * delta[0] + A[1][1] * delta[1]) * delta[1]) /
- 2.0f;
-}
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i = 0;
- uint32_t j = 0;
- uint32_t k = 0;
- vsi_size_t numBoxes = 0;
- vsi_size_t heatmapSize = 0;
- vsi_size_t numKeypoints = 0;
- uint32_t boxInfoLength = 4;
- uint32_t output_score_index = 0;
- uint32_t output_keypoint_index = 0;
-
- /* prepare data */
- for (i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input buffer fail.", final );
- }
-
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
-
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- numBoxes = in_attr[0]->shape->data[3];
- heatmapSize = in_attr[0]->shape->data[2];
- numKeypoints = in_attr[0]->shape->data[0];
-
- for(i = 0; i < numBoxes; i++)
- {
- for (j = 0; j < numKeypoints; j++)
- {
- uint32_t maxIndex = 0;
- float maxScore = -FLT_MAX;
- vsi_size_t maxIndexWidth;
- vsi_size_t maxIndexHeight;
- float localGrid[3][3] = {{0}};
- int32_t dh, dw;
- float delta[2] = {0.0f, 0.0f}, deltaScore;
- float wRoiStart = f32_in_buffer[1][i * boxInfoLength];
- float hRoiStart = f32_in_buffer[1][i * boxInfoLength + 1];
- float wRoiEnd = f32_in_buffer[1][i * boxInfoLength + 2];
- float hRoiEnd = f32_in_buffer[1][i * boxInfoLength + 3];
- float roiWidth = wRoiEnd - wRoiStart;
- float roiHeight = hRoiEnd - hRoiStart;
- float wRelativePos;
- float hRelativePos;
- for (k = 0; k < heatmapSize * heatmapSize; k++)
- {
- vsi_size_t index = i * heatmapSize * heatmapSize * numKeypoints
- + k * numKeypoints + j;
- float val = f32_in_buffer[0][index];
- if (maxScore < val)
- {
- maxScore = val;
- maxIndex = k;
- }
- }
- maxIndexWidth = maxIndex % heatmapSize;
- maxIndexHeight = maxIndex / heatmapSize;
-
- // get local 3x3 grid
- for (dh = -1; dh <= 1; dh++)
- {
- for (dw = -1; dw <= 1; dw++)
- {
- // cast uint32_t to int32_t
- vsi_ssize_t h = (vsi_ssize_t)(maxIndexHeight) + dh;
- vsi_ssize_t w = (vsi_ssize_t)(maxIndexWidth) + dw;
- vsi_size_t heatmapIndex;
-
- // use mirroring for out of bound indexing
- // need to ensure heatmapSize >= 2
- h = h < 0 ? 1 : (h >= (vsi_ssize_t)heatmapSize ? heatmapSize - 2 : h);
- w = w < 0 ? 1 : (w >= (vsi_ssize_t)heatmapSize ? heatmapSize - 2 : w);
-
- heatmapIndex = i * heatmapSize * heatmapSize * numKeypoints +
- (vsi_size_t)(h) * heatmapSize * numKeypoints +
- (vsi_size_t)(w) * numKeypoints + j;
- localGrid[dh + 1][dw + 1] = f32_in_buffer[0][heatmapIndex];
- }
- }
- deltaScore = maxScore;
- _solve_for_delta((const float (*)[3])localGrid, delta, &deltaScore, 1e-3f, 1e-3f);
-
- wRelativePos = ((float)(maxIndexWidth) + delta[0] + 0.5f) /
- (float)(heatmapSize);
- hRelativePos = ((float)(maxIndexHeight) + delta[1] + 0.5f) /
- (float)(heatmapSize);
- f32_out_buffer[0][output_score_index] = deltaScore;
- f32_out_buffer[1][output_keypoint_index] = wRelativePos * roiWidth + wRoiStart;
- f32_out_buffer[1][output_keypoint_index + 1] = hRelativePos * roiHeight + hRoiStart;
- output_score_index++;
- output_keypoint_index += 2;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- vsi_nn_safe_free(f32_in_buffer[i]);
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- vsi_nn_safe_free(f32_out_buffer[i]);
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _heatmap_max_keypoint_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _heatmap_max_keypoint_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_HEATMAP_MAX_KEYPOINT_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _HEATMAP_MAX_KEYPOINT_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _HEATMAP_MAX_KEYPOINT_PARAM_NUM );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( heatmap_max_keypoint, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/instance_normalization_cpu.c b/src/tim/vx/internal/src/kernel/cpu/instance_normalization_cpu.c
deleted file mode 100644
index 24a1db4..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/instance_normalization_cpu.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (3)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.instance_norm")
-
-DEF_KERNEL_EXECUTOR(_instance_norm_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_size_t batch = 1;
- vsi_size_t depth = 1;
- vsi_size_t norm_size = 1;
- vsi_size_t b = 0;
- vsi_size_t c = 0;
- vsi_size_t i = 0;
- size_t rank = 1;
- float eps = .0f;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[4], &eps);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create input1 buffer fail.", final );
-
- buffer[3] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
- memset( buffer[3], 0, out_elements * sizeof(float) );
-
- rank = attr[0]->shape->size;
-
- batch = attr[0]->shape->data[rank - 1];
- depth = attr[0]->shape->data[rank - 2];
-
- for ( i = 0; i < (vsi_size_t)rank - 2; i++)
- {
- norm_size *= attr[0]->shape->data[i];
- }
-
- for (b = 0; b < batch; b++)
- {
- for (c = 0; c < depth; c++)
- {
- vsi_size_t page = c * norm_size + b * norm_size * depth;
- float sum = .0f;
- float sumsq = .0f;
- float mean = .0f;
- float vari = .0f;
- float data = 0;
- float scaleVal = buffer[2][c];
- float biasVal = buffer[1][c];
-
- for (i = 0; i < norm_size; i++)
- {
- vsi_size_t index = page + i;
- sum += buffer[0][index];
- }
-
- mean = sum / (float)norm_size;
-
- for (i = 0; i < norm_size; i++)
- {
- vsi_size_t index = page + i;
- data = buffer[0][index] - mean;
- sumsq += data * data;
- }
-
- vari = sumsq / (float)norm_size;
- vari = (float)(1.0 / sqrtf(vari + eps));
-
- for (i = 0; i < norm_size; i++)
- {
- float normVal = 0;
- vsi_size_t index = page + i;
- data = buffer[0][index] - mean;
-
- normVal = data * vari * scaleVal + biasVal;
- buffer[3][index] = normVal;
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _instance_norm_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _instance_normalization_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _INSTANCE_NORMALIZATION_PARAM_NUM _cnt_of_array( _instance_normalization_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _instance_norm_exec;
- kernel->info.parameters = _instance_normalization_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _instance_normalization_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[4] = vsi_nn_kernel_scalar_create( graph, F32, &eps );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( instance_norm, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/l2normalizescale_cpu.c b/src/tim/vx/internal/src/kernel/cpu/l2normalizescale_cpu.c
deleted file mode 100644
index c220601..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/l2normalizescale_cpu.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.l2normalizescale")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _l2normalizescale_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-#define _L2NORMALIZESCALE_PARAM_NUM _cnt_of_array( _l2normalizescale_kernel_param_def )
-
-#define SCALAR_INPUT_AXIS (3)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- vsi_ssize_t index;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- int32_t outer = 0;
- float rsqrt = 0.0f, scaleValue = 0.0f;
- float epsilon = (float)10e-12;
- float l2Value = 0.0f, tmpValue = 0.0f;
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_AXIS], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for (i = 0; i < (uint32_t)axis; i++)
- {
- innerSize *= in_attr[0]->shape->data[i];
- }
-
- axisSize = in_attr[0]->shape->data[axis];
-
- for (i = (uint32_t)axis + 1; i < in_attr[0]->shape->size; i++)
- {
- outerSize *= in_attr[0]->shape->data[i];
- }
-
- for (outer = 0; outer < outerSize; ++outer) {
- for (inner = 0; inner < innerSize; ++inner) {
- float sum = 0.0f;
-
- for (i = 0; i < (uint32_t)axisSize; ++i) {
- index = (outer * axisSize + i) * innerSize + inner;
- tmpValue = f32_in_buffer[0][index];
- sum += tmpValue * tmpValue;
- }
- rsqrt = 1.0f / sqrtf(vsi_nn_max(sum, epsilon));
- for (i = 0; i < (uint32_t)axisSize; ++i) {
- index = (outer * axisSize + i) * innerSize + inner;
- tmpValue = f32_in_buffer[0][index];;
- scaleValue = f32_in_buffer[1][i];;
- l2Value = tmpValue * rsqrt * scaleValue;
- f32_out_buffer[0][index] = l2Value;
- }
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _l2normalizescale_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _l2normalizescale_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_L2NORMALIZESCALE_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _L2NORMALIZESCALE_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _L2NORMALIZESCALE_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( l2normalizescale, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/layer_normalization_cpu.c b/src/tim/vx/internal/src/kernel/cpu/layer_normalization_cpu.c
deleted file mode 100644
index 1329ce3..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/layer_normalization_cpu.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (3)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.layer_norm")
-
-DEF_KERNEL_EXECUTOR(_layer_norm_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_size_t i = 0;
- float eps = .0f;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[4], &eps);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create input1 buffer fail.", final );
-
- buffer[3] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
- memset( buffer[3], 0, out_elements * sizeof(float) );
-
- {
- vsi_size_t axis_first = 0;
- vsi_size_t axis_num = 1;
- vsi_size_t outerSize = 1;
- vsi_size_t axisSize = 1;
- vsi_size_t innerSize = 1;
- vsi_size_t inner = 0;
- vsi_size_t outer = 0;
-
- for (i = 0; i < axis_first; i++)
- {
- innerSize *= attr[0]->shape->data[i];
- }
-
- for(i = 0; i < axis_num; i++)
- {
- axisSize *= attr[0]->shape->data[axis_first + i];
- }
-
- for (i = axis_first + axis_num; i < attr[0]->shape->size; i++)
- {
- outerSize *= attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- float sum = .0f;
- float sumsq = .0f;
- float mean = .0f;
- float vari = .0f;
-
- for (i = 0; i < axisSize; ++i)
- {
- float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
- sum += value;
- sumsq += (value * value);
- }
- mean = sum / (axisSize);
- vari = sumsq / (axisSize) - mean * mean;
- vari = (float)(1.0 / sqrtf(vari + eps));
-
- for (i = 0; i < axisSize; ++i)
- {
- vsi_ssize_t idx = (outer * axisSize + i) * innerSize + inner;
- float data = buffer[0][idx] - mean;
- float scaleVal = buffer[2][i];
- float biasVal = buffer[1][i];
- float normVal = data * vari * scaleVal + biasVal;
- buffer[3][idx] = normVal;
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _layer_norm_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _layer_normalization_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _LAYER_NORMALIZATION_PARAM_NUM _cnt_of_array( _layer_normalization_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _layer_norm_exec;
- kernel->info.parameters = _layer_normalization_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _layer_normalization_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[4] = vsi_nn_kernel_scalar_create( graph, F32, &eps );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( layer_norm, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/log_softmax_cpu.c b/src/tim/vx/internal/src/kernel/cpu/log_softmax_cpu.c
deleted file mode 100644
index 67e0d84..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/log_softmax_cpu.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "utils/vsi_nn_dtype_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "kernel/vsi_nn_kernel_eltwise.h"
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (2)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("log_softmax_sw")
-
-DEF_KERNEL_EXECUTOR(_log_softmax_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- int32_t axis = 0;
- float beta = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t i = 0;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[3], &beta);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- for (i = 0; i < axis; i++)
- {
- innerSize *= attr[0]->shape->data[i];
- }
-
- axisSize = attr[0]->shape->data[axis];
-
- for (i = axis + 1; i < (vsi_ssize_t)attr[0]->shape->size; i++)
- {
- outerSize *= attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- // We subtract the maximum value from each element to ensure
- // numerical stability, taking advantage of the following equality:
- // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
- float sum = 0;
- float logSum = 0;
- float maxValue = buffer[0][outer * axisSize * innerSize + inner];
- for (i = 1; i < axisSize; ++i)
- {
- maxValue = vsi_nn_max(maxValue, buffer[0][(outer * axisSize + i) * innerSize + inner]);
- }
-
- sum = 0;
- for (i = 0; i < axisSize; ++i)
- {
- sum += expf((buffer[0][(outer * axisSize + i) * innerSize + inner] - maxValue) * beta);
- }
-
- logSum = logf(sum);
- for (i = 0; i < axisSize; ++i)
- {
- buffer[1][(outer * axisSize + i) * innerSize + inner] =
- (buffer[0][(outer * axisSize + i) * innerSize + inner] - maxValue) * beta -
- logSum;
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _log_softmax_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _log_softmax_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-#define SCALAR_INPUT_AXIS (2)
-#define SCALAR_INPUT_BETA (3)
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
- float beta = 1.0f;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
- beta = vsi_nn_kernel_param_get_float32(params, "beta");
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- backend_params[SCALAR_INPUT_BETA] = vsi_nn_kernel_scalar_create(
- graph, F32, &beta );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_AXIS] );
- vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_BETA] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( log_softmax, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/logical_not_cpu.c b/src/tim/vx/internal/src/kernel/cpu/logical_not_cpu.c
deleted file mode 100644
index 9bcdcab..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/logical_not_cpu.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.logical_not")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _logical_not_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _LOGICAL_NOT_PARAM_NUM _cnt_of_array( _logical_not_kernel_param_def )
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- for (i = 0; i < out_elements[0]; i++)
- {
- f32_out_buffer[0][i] = (float)(!f32_in_buffer[0][i]);
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _logical_not_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _logical_not_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_LOGICAL_NOT_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs);
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_NOT_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_NOT_PARAM_NUM );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( logical_not, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/logical_ops_cpu.c b/src/tim/vx/internal/src/kernel/cpu/logical_ops_cpu.c
deleted file mode 100644
index 07deb44..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/logical_ops_cpu.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.logical_ops")
-
-/*
- * Kernel params
- */
-static vx_param_description_t _logical_ops_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _LOGICAL_OPS_PARAM_NUM _cnt_of_array( _logical_ops_kernel_param_def )
-
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for( i = 0; i < rank && index; i ++ )
- {
- if( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
- return offset;
-}
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- uint32_t ops_type_int = 0;
- vsi_nn_logical_ops_type_t ops_type = VSI_NN_LOGICAL_OR;
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- vsi_nn_kernel_scalar_read_uint32((vsi_nn_kernel_scalar_t)param[_CPU_IO_NUM], &(ops_type_int));
- ops_type = (vsi_nn_logical_ops_type_t)ops_type_int;
- if (!(VSI_NN_LOGICAL_OR == ops_type || VSI_NN_LOGICAL_AND == ops_type || VSI_NN_LOGICAL_XOR == ops_type))
- {
- status = VSI_FAILURE;
- goto final;
- }
-
- for (i = 0; i < out_elements[0]; i++)
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- vsi_ssize_t in0 = 0;
- vsi_ssize_t in1 = 0;
-
-
- in0_offset = _expand_offset( i, in_attr[0]->shape->data, (vsi_size_t)in_attr[0]->shape->size,
- in_stride_size[0], out_attr[0]->shape->data );
- in1_offset = _expand_offset( i, in_attr[1]->shape->data, (vsi_size_t)in_attr[1]->shape->size,
- in_stride_size[1], out_attr[0]->shape->data );
- in0 = (!!(f32_in_buffer[0][in0_offset]));
- in1 = (!!(f32_in_buffer[1][in1_offset]));
- if (VSI_NN_LOGICAL_OR == ops_type)
- {
- f32_out_buffer[0][i] = (float)(in0 || in1);
- }
- else if (VSI_NN_LOGICAL_AND == ops_type)
- {
- f32_out_buffer[0][i] = (float)(in0 && in1);
- }
- else if (VSI_NN_LOGICAL_XOR == ops_type)
- {
- f32_out_buffer[0][i] = (float)(in0 ^ in1);
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _logical_ops_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _logical_ops_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_LOGICAL_OPS_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- uint32_t ops_type = vsi_nn_kernel_param_get_int32( params, "ops_type" );
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
-
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_OPS_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[_CPU_IO_NUM] = vsi_nn_kernel_scalar_create( graph, U32, &ops_type );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_OPS_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[_CPU_IO_NUM] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( logical_ops, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/lppool_cpu.c b/src/tim/vx/internal/src/kernel/cpu/lppool_cpu.c
deleted file mode 100644
index 0f66636..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/lppool_cpu.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.lppool")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _lppool_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _LPPOOL_PARAM_NUM _cnt_of_array( _lppool_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_lppool_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float * buffer[_INPUT_NUM + _OUTPUT_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_INPUT_NUM + _OUTPUT_NUM] = { NULL };
- int32_t ksize_x = 0, ksize_y = 0, stride_x = 0, stride_y = 0;
- int32_t pad_left = 0, pad_right = 0, pad_top = 0, pad_bottom = 0;
- int32_t p = 0;
- int32_t i = 0;
- input[0] = (vsi_nn_kernel_tensor_t)param[0];
- output[0] = (vsi_nn_kernel_tensor_t)param[1];
- attr[0] = vsi_nn_kernel_tensor_attr_create( input[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( output[0] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &ksize_x);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &ksize_y);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &pad_left);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &pad_right);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[6], &pad_top);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[7], &pad_bottom);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[8], &stride_x);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[9], &stride_y);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[10], &p);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( input[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- {
- int32_t batch = (int32_t)attr[1]->shape->data[2];
- int32_t height_o = (int32_t)attr[1]->shape->data[1];
- int32_t width_o = (int32_t)attr[1]->shape->data[0];
- int32_t height = (int32_t)attr[0]->shape->data[1];
- int32_t width = (int32_t)attr[0]->shape->data[0];
- int32_t b = 0, j = 0;
- int32_t output_base = 0;
- int32_t input_base = 0;
- float data = 0;
- for (b = 0; b < batch; b++)
- {
- output_base = b * height_o * width_o;
- input_base = b * height * width;
- for (j = 0; j < height_o; j++)
- {
- for (i = 0; i < width_o; i++)
- {
- int32_t hstart = j * stride_y - pad_top;
- int32_t wstart = i * stride_x - pad_left;
- int32_t hend = vsi_nn_min(hstart + ksize_y, height);
- int32_t wend = vsi_nn_min(wstart + ksize_x, width);
- int32_t pool_index = output_base + j * width_o + i;
- int32_t h = 0, w = 0;
- float sum_of_pow = 0;
- float out_data = 0;
- hstart = vsi_nn_max(hstart, 0);
- wstart = vsi_nn_max(wstart, 0);
-
- for (h = hstart; h < hend; ++ h)
- {
- for (w = wstart; w < wend; ++ w)
- {
- int32_t index = input_base + h * width + w;
- data = buffer[0][index];
- sum_of_pow += (float)pow(fabs(data),p);
- }
- }
- out_data = (float)pow(sum_of_pow, 1.0f / p);
- buffer[1][pool_index] = out_data;
- }
- }
- }
-
- }
- status = vsi_nn_kernel_tensor_write_from_float( output[0], attr[1],
- buffer[1], out_elements );
-final:
- for ( i = 0; i < _INPUT_NUM + _OUTPUT_NUM; i ++ )
- {
- vsi_nn_safe_free( buffer[i] );
- if (attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- }
-
- return status;
-} /* _lppool_exec() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _lppool_exec;
- kernel->info.parameters = _lppool_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _lppool_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_LPPOOL_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
-
- int32_t ksize_x = vsi_nn_kernel_param_get_int32(params, "ksize_x");
- int32_t ksize_y = vsi_nn_kernel_param_get_int32(params, "ksize_y");
- int32_t stride_x = vsi_nn_kernel_param_get_int32(params, "stride_x");
- int32_t stride_y = vsi_nn_kernel_param_get_int32(params, "stride_y");
- int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
- int32_t pad_right = vsi_nn_kernel_param_get_int32(params, "pad_right");
- int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
- int32_t pad_bottom = vsi_nn_kernel_param_get_int32(params, "pad_bottom");
- int32_t p = vsi_nn_kernel_param_get_int32(params, "p");
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- int32_t index = 2;
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _LPPOOL_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_x );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_y );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_right );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_bottom );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_x );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_y );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &p );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _LPPOOL_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[2] );
- vsi_nn_kernel_scalar_release( &node_params[3] );
- vsi_nn_kernel_scalar_release( &node_params[4] );
- vsi_nn_kernel_scalar_release( &node_params[5] );
- vsi_nn_kernel_scalar_release( &node_params[6] );
- vsi_nn_kernel_scalar_release( &node_params[7] );
- vsi_nn_kernel_scalar_release( &node_params[8] );
- vsi_nn_kernel_scalar_release( &node_params[9] );
- vsi_nn_kernel_scalar_release( &node_params[10] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( lppool, _setup )
-
diff --git a/src/tim/vx/internal/src/kernel/cpu/lstmunit_activation_cpu.c b/src/tim/vx/internal/src/kernel/cpu/lstmunit_activation_cpu.c
deleted file mode 100644
index ade68ef..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/lstmunit_activation_cpu.c
+++ /dev/null
@@ -1,400 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (LSTMUNIT_ACT_INPUTS_COUNT)
-#define _OUTPUT_NUM (LSTMUNIT_ACT_OUTUTS_COUNT)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.lstmunit_activation")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _lstmunit_activation_kernel_param_def[] =
-{
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*0 input_fc_i */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED }, /*1 input_fc_f */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED }, /*2 input_fc_c */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED }, /*3 input_fc_o */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED }, /*4 cs_in */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*5 hstate_fc_i */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*6 hstate_fc_f */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*7 hstate_fc_c */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*8 hstate_fc_o */
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*9 biases_i*/
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*10 biases_f*/
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*11 biases_c*/
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*12 biases_o*/
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*13 ln_w_i*/
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*14 ln_w_f*/
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*15 ln_w_c*/
- { VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*16 ln_w_o*/
- { VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED }, /*17 output*/
- { VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED }, /*18 cs_out*/
- { VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL }, /*19 hs_out*/
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_OPTIONAL }, /*20 _is_ln*/
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_OPTIONAL }, /*21 _is_cifg*/
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_OPTIONAL }, /*22 _is_proj*/
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_OPTIONAL }, /*23 _is_hybrid*/
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_OPTIONAL }, /*24 recurrent_activation*/
- { VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_OPTIONAL }, /*25 forget_bias*/
-};
-#define _LSTMUNIT_ACTIVATION_PARAM_NUM _cnt_of_array( _lstmunit_activation_kernel_param_def )
-
-#define SCALAR_IS_LN (20)
-#define SCALAR_IS_CIFG (21)
-#define SCALAR_IS_PROG (22)
-#define SCALAR_IS_HYBRID (23)
-#define SCALAR_ACTIVATION (24)
-#define SCALAR_FORGET_BIAS (25)
-
-static float activationFunctor(float a, vsi_nn_activation_e act_)
-{
- switch (act_)
- {
- case VSI_NN_ACT_NONE:
- return a;
- case VSI_NN_ACT_RELU:
- return a < 0.f ? 0.f : a;
- case VSI_NN_ACT_RELU6:
- return vsi_nn_max(0.f, vsi_nn_min(a, 6.f));
- case VSI_NN_ACT_TANH:
- return (float)tanh(a);
- case VSI_NN_ACT_SIGMOID:
- return (float)(1.0f / (1.0f + exp(-a)));
- case VSI_NN_ACT_HARD_SIGMOID:
- a = a * 0.2f + 0.5f;
- return vsi_nn_max(0.f, vsi_nn_min(a, 1.f));
- default:
- // TODO(aselle): More informative fatal error!
- exit(1);
- }
-}
-
-#define gcoMATH_Exp(X) (float)(expf((X)))
-#define gcoMATH_TangentH(X) (float)(tanhf((X)))
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- vsi_size_t i, b;
- int32_t _is_ln = 0;
- int32_t _is_cifg = 0;
- int32_t _is_proj = 0;
- int32_t _is_hybrid = 0;
- int32_t recurrent_activation;
- vsi_nn_activation_e activation_mode;
- vsi_size_t n_batch = 0;
- vsi_size_t n_cell = 0;
- float forget_bias;
- /* prepare data */
- for( i = 0; i < _INPUT_NUM; i++ )
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- if (input[i])
- {
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- }
-
- for( i = 0; i < _OUTPUT_NUM; i++ )
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- if (output[i])
- {
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
- }
-
- status = vsi_nn_kernel_scalar_read_int32( (vsi_nn_kernel_scalar_t)param[SCALAR_IS_LN], &_is_ln );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( (vsi_nn_kernel_scalar_t)param[SCALAR_IS_CIFG], &_is_cifg );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( (vsi_nn_kernel_scalar_t)param[SCALAR_IS_PROG], &_is_proj );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32( (vsi_nn_kernel_scalar_t)param[SCALAR_IS_HYBRID], &_is_hybrid );
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ACTIVATION], &recurrent_activation );
- CHECK_STATUS_FAIL_GOTO(status, final );
- activation_mode = (vsi_nn_activation_e)recurrent_activation;
- status = vsi_nn_kernel_scalar_read_float32( (vsi_nn_kernel_scalar_t)param[SCALAR_FORGET_BIAS], &forget_bias );
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- n_cell = in_attr[LSTMUNIT_ACT_CSTATE_IN]->shape->data[0];
- n_batch = in_attr[LSTMUNIT_ACT_CSTATE_IN]->shape->data[1];
-
- for (b = 0; b < n_batch; b ++)
- {
- for (i = 0; i < n_cell; i++)
- {
- vsi_size_t index = i + n_cell * b;
- float data_i_t = 0;
- float data_f_t = 0;
- float data_g_t = 0;
- float data_o_t = 0;
- float data_c_t = 0;
- float data_h_t = 0;
-
- data_i_t = _is_cifg ? 0 : f32_in_buffer[LSTMUNIT_ACT_INPUT_FC_I][index];
- data_f_t = f32_in_buffer[LSTMUNIT_ACT_INPUT_FC_F][index];
- data_g_t = f32_in_buffer[LSTMUNIT_ACT_INPUT_FC_C][index];
- data_o_t = f32_in_buffer[LSTMUNIT_ACT_INPUT_FC_O][index];
- data_c_t = f32_in_buffer[LSTMUNIT_ACT_CSTATE_IN][index];
-
- if (!_is_ln)
- {
- data_i_t += _is_cifg ? 0 : f32_in_buffer[LSTMUNIT_ACT_HSTATE_FC_I][index];
- data_f_t += f32_in_buffer[LSTMUNIT_ACT_HSTATE_FC_F][index];
- data_g_t += f32_in_buffer[LSTMUNIT_ACT_HSTATE_FC_C][index];
- data_o_t += f32_in_buffer[LSTMUNIT_ACT_HSTATE_FC_O][index];
- }
-
- if (!_is_cifg)
- {
- if (_is_ln)
- {
- data_i_t *= f32_in_buffer[LSTMUNIT_ACT_LN_WI][i];
- data_i_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BI][i];
- }
- else if (_is_hybrid)
- {
- data_i_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BI][i];
- }
- }
-
- if (_is_ln)
- {
- data_f_t *= f32_in_buffer[LSTMUNIT_ACT_LN_WF][i];
- data_f_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BF][i];
- data_g_t *= f32_in_buffer[LSTMUNIT_ACT_LN_WC][i];
- data_g_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BC][i];
- data_o_t *= f32_in_buffer[LSTMUNIT_ACT_LN_WO][i];
- data_o_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BO][i];
- }
- else if (_is_hybrid)
- {
- data_f_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BF][i];
- data_g_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BC][i];
- data_o_t += f32_in_buffer[LSTMUNIT_ACT_DATA_BO][i];
- }
-
- data_f_t += forget_bias;
- data_f_t = activationFunctor(data_f_t, activation_mode);
-
- if (_is_cifg)
- data_i_t = 1 - data_f_t;
- else
- data_i_t = activationFunctor(data_i_t, activation_mode);
- data_g_t = gcoMATH_TangentH(data_g_t);
- data_o_t = activationFunctor(data_o_t, activation_mode);
- data_c_t = data_f_t * data_c_t + data_i_t * data_g_t;
- data_h_t = data_o_t * gcoMATH_TangentH(data_c_t);
-
- f32_out_buffer[LSTMUNIT_ACT_CSTATE_OUT][index] = data_c_t;
- f32_out_buffer[LSTMUNIT_ACT_OUTPUT][index] = data_h_t;
-
- if (!_is_proj)
- {
- f32_out_buffer[LSTMUNIT_ACT_HSTATE_OUT][index] = data_h_t;
- }
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (output[i])
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
-
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _lstmunit_activation_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _lstmunit_activation_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_LSTMUNIT_ACTIVATION_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t _is_ln= 0;
- int32_t _is_cifg= 0;
- int32_t _is_proj= 0;
- int32_t _is_hybrid= 0;
- int32_t recurrent_activation;
- float forget_bias;
-
- _is_ln = vsi_nn_kernel_param_get_int32( params, "_is_ln" );
- _is_cifg = vsi_nn_kernel_param_get_int32( params, "_is_cifg" );
- _is_proj = vsi_nn_kernel_param_get_int32( params, "_is_proj" );
- _is_hybrid = vsi_nn_kernel_param_get_int32( params, "_is_hybrid" );
- recurrent_activation = vsi_nn_kernel_param_get_int32( params, "recurrent_activation" );
- forget_bias = vsi_nn_kernel_param_get_float32(params, "forget_bias");
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _LSTMUNIT_ACTIVATION_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_IS_LN] = vsi_nn_kernel_scalar_create(
- graph, I32, &_is_ln );
- node_params[SCALAR_IS_CIFG] = vsi_nn_kernel_scalar_create(
- graph, I32, &_is_cifg );
- node_params[SCALAR_IS_PROG] = vsi_nn_kernel_scalar_create(
- graph, I32, &_is_proj );
- node_params[SCALAR_IS_HYBRID] = vsi_nn_kernel_scalar_create(
- graph, I32, &_is_hybrid );
- node_params[SCALAR_ACTIVATION] = vsi_nn_kernel_scalar_create(
- graph, I32, &recurrent_activation );
- node_params[SCALAR_FORGET_BIAS] = vsi_nn_kernel_scalar_create(
- graph, F32, &forget_bias );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _LSTMUNIT_ACTIVATION_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_IS_LN] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_IS_CIFG] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_IS_PROG] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_IS_HYBRID] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ACTIVATION] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_FORGET_BIAS] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( lstmunit_activation, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/matrixmul_cpu.c b/src/tim/vx/internal/src/kernel/cpu/matrixmul_cpu.c
deleted file mode 100644
index 846df68..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/matrixmul_cpu.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (2)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.matrixmul")
-
-DEF_KERNEL_EXECUTOR(_matrixmul_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[3] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_size_t i = 0;
- vsi_size_t M = 0, K = 0, N = 0;
- int32_t transposeA = 0, transposeB = 0;
- size_t strides0[2] = {0, 0}, strides1[2] = {0, 0};
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &transposeA);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &transposeB);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- K = attr[0]->shape->data[0];
- M = attr[2]->shape->data[1];
- N = attr[2]->shape->data[0];
-
- if(transposeA)
- {
- K = attr[0]->shape->data[1];
- }
-
- strides0[0] = transposeA? 1:K;
- strides0[1] = transposeA? M:1;
-
- strides1[0] = transposeB? 1:N;
- strides1[1] = transposeB? K:1;
-
- {
- vsi_size_t batch = attr[2]->shape->size > 3 ? attr[2]->shape->data[3] : 1;
- vsi_size_t depth = attr[2]->shape->size > 2 ? attr[2]->shape->data[2] : 1;
- vsi_size_t a_depth = attr[0]->shape->size > 2 ? attr[0]->shape->data[2] : 1;
- vsi_size_t b_depth = attr[1]->shape->size > 2 ? attr[1]->shape->data[2] : 1;
- vsi_size_t b = 0, c = 0, j = 0, y = 0;
- vsi_size_t offsetA = 0, offsetB = 0, offsetD = 0;
- vsi_size_t ac2zero = 1;
- vsi_size_t bc2zero = 1;
-
- if((attr[0]->shape->size > attr[1]->shape->size) ||
- (attr[0]->shape->data[2] > attr[1]->shape->data[2]
- && attr[0]->shape->size > 2 && attr[1]->shape->size > 2))
- {
- bc2zero = 0;
- }
- else if((attr[1]->shape->size > attr[0]->shape->size) ||
- (attr[1]->shape->data[2] > attr[0]->shape->data[2]
- && attr[0]->shape->size > 2 && attr[1]->shape->size > 2))
- {
- ac2zero = 0;
- }
-
- for(b = 0; b < batch; b++)
- {
- for(c = 0; c < depth; c++)
- {
- offsetA = c * M * K * ac2zero + b * M * K * a_depth;
- offsetB = c * N * K * bc2zero + b * N * K * b_depth;
- offsetD = c * M * N + b * M * N * depth;
- for(i = 0 ; i < M; i++)
- {
- for(j = 0; j < N; j++)
- {
- float sum = 0;
- for(y = 0; y < K; y++)
- {
- float dataA = buffer[0][i * strides0[0] + y * strides0[1] + offsetA];
- float dataB = buffer[1][y * strides1[0] + j * strides1[1] + offsetB];
-
- sum += dataA * dataB;
- }
- buffer[2][j + i * N + offsetD] = sum;
- }
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < 3; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_yuv420_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _matrixmul_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _MATIRXMUL_PARAM_NUM _cnt_of_array( _matrixmul_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _matrixmul_exec;
- kernel->info.parameters = _matrixmul_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _matrixmul_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t transposeA = vsi_nn_kernel_param_get_int32( params, "transposeA" );
- int32_t transposeB = vsi_nn_kernel_param_get_int32( params, "transposeB" );
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 3;
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &transposeA );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &transposeB );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( matrixmul, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/maximum_cpu.c b/src/tim/vx/internal/src/kernel/cpu/maximum_cpu.c
deleted file mode 100644
index 183fedc..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/maximum_cpu.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "utils/vsi_nn_dtype_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "kernel/vsi_nn_kernel_eltwise.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (0)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("maximum_sw")
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for ( i = 0; i < rank && index; i ++ )
- {
- if (strides[0] == 0)
- {
- if (i == 0)
- {
- offset += (index % out_shape[0]);
- }
- else
- {
- offset += (vsi_ssize_t)strides[i] * 2 * ( index % out_shape[i] );
- }
- }
- else if ( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
-
- return offset;
-}
-
-DEF_KERNEL_EXECUTOR(_maximum_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_SUCCESS;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- vsi_size_t out_elements = 0;
- vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
-
- vsi_nn_kernel_tensor_attr_get_stride( attr[0], stride_size[0] );
- vsi_nn_kernel_tensor_attr_get_stride( attr[1], stride_size[1] );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- for (i = 0; i < out_elements; i++)
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- float val1 = 0.f;
- float val2 = 0.f;
-
- in0_offset = _expand_offset( i, attr[0]->shape->data, (vsi_size_t)attr[0]->shape->size,
- stride_size[0], attr[2]->shape->data );
- in1_offset = _expand_offset( i, attr[1]->shape->data, (vsi_size_t)attr[1]->shape->size,
- stride_size[1], attr[2]->shape->data );
-
- val1 = buffer[0][in0_offset];
- val2 = buffer[1][in1_offset];
-
- buffer[2][i] = vsi_nn_max( val1, val2 );
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _maximum_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _maximum_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( maximum, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/maxpoolwithargmax_cpu.c b/src/tim/vx/internal/src/kernel/cpu/maxpoolwithargmax_cpu.c
deleted file mode 100644
index 900451a..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/maxpoolwithargmax_cpu.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (8)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (2)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.maxpoolwithargmax")
-
-#define FP32_MIN -3.4e38
-
-/*
- * Kernel params
- */
-static vx_param_description_t _maxpoolwithargmax_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
- // Add kererl parameters here
-};
-#define _MAXPOOLWITHARGMAX_PARAM_NUM _cnt_of_array( _maxpoolwithargmax_kernel_param_def )
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_maxpoolwithargmax_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- int32_t ksize_x = 0, ksize_y = 0, stride_x = 0, stride_y = 0;
- int32_t pad_left = 0, pad_right = 0, pad_top = 0, pad_bottom = 0;
- int32_t i = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &ksize_x);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &ksize_y);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &stride_x);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[6], &stride_y);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[7], &pad_left);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[8], &pad_right);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[9], &pad_top);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[10], &pad_bottom);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- {
- int32_t dims_num = (int32_t)attr[1]->shape->size;
- int32_t batch = dims_num > 3 ? (int32_t)attr[1]->shape->data[3] : 1;
- int32_t depth = dims_num > 2 ? (int32_t)attr[1]->shape->data[2] : 1;
- int32_t height_o = (int32_t)attr[1]->shape->data[1];
- int32_t width_o = (int32_t)attr[1]->shape->data[0];
- int32_t width = (int32_t)attr[0]->shape->data[0];
- int32_t height = (int32_t)attr[0]->shape->data[1];
- int32_t b = 0, d = 0, j = 0;
- int32_t output_base = 0;
- int32_t input_base = 0;
-
- for (b = 0; b < batch; b++)
- {
- for (d = 0; d < depth; d++)
- {
- output_base = b * depth * height_o * width_o + d * height_o * width_o;
- input_base = b * depth * height * width + d * height * width;
- for (j = 0; j < height_o; j++)
- {
- for (i = 0; i < width_o; i++)
- {
- int32_t hstart = j * stride_y - pad_top;
- int32_t wstart = i * stride_x - pad_left;
- int32_t hend = vsi_nn_min(hstart + ksize_y, height);
- int32_t wend = vsi_nn_min(wstart + ksize_x, width);
- int32_t pool_index = output_base + j * width_o + i;
- int32_t h = 0, w = 0;
- int32_t index_max = 0;
- float value_max = (float)FP32_MIN;
-
- hstart = vsi_nn_max(hstart, 0);
- wstart = vsi_nn_max(wstart, 0);
-
- for (h = hstart; h < hend; ++ h)
- {
- for (w = wstart; w < wend; ++ w)
- {
- int32_t index = input_base + h * width + w;
- float data = buffer[0][index];
-
- if (data > value_max)
- {
- value_max = data;
- index_max = index;
- }
- }
- }
- buffer[1][pool_index] = value_max;
- buffer[2][pool_index] = (float)index_max;
- }
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- status |= vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for ( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for ( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _maxpoolwithargmax_exec() */
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _maxpoolwithargmax_exec;
- kernel->info.parameters = _maxpoolwithargmax_kernel_param_def;
- kernel->info.numParams = _MAXPOOLWITHARGMAX_PARAM_NUM;
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_MAXPOOLWITHARGMAX_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
-
- int32_t ksize_x = vsi_nn_kernel_param_get_int32(params, "ksize_x");
- int32_t ksize_y = vsi_nn_kernel_param_get_int32(params, "ksize_y");
- int32_t stride_x = vsi_nn_kernel_param_get_int32(params, "stride_x");
- int32_t stride_y = vsi_nn_kernel_param_get_int32(params, "stride_y");
- int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
- int32_t pad_right = vsi_nn_kernel_param_get_int32(params, "pad_right");
- int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
- int32_t pad_bottom = vsi_nn_kernel_param_get_int32(params, "pad_bottom");
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- int32_t index = 3;
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _MAXPOOLWITHARGMAX_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_x );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_y );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_x );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_y );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_right );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_bottom );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _MAXPOOLWITHARGMAX_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[3] );
- vsi_nn_kernel_scalar_release( &node_params[4] );
- vsi_nn_kernel_scalar_release( &node_params[5] );
- vsi_nn_kernel_scalar_release( &node_params[6] );
- vsi_nn_kernel_scalar_release( &node_params[7] );
- vsi_nn_kernel_scalar_release( &node_params[8] );
- vsi_nn_kernel_scalar_release( &node_params[9] );
- vsi_nn_kernel_scalar_release( &node_params[10] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( maxpoolwithargmax, _setup )
-
diff --git a/src/tim/vx/internal/src/kernel/cpu/minimum_cpu.c b/src/tim/vx/internal/src/kernel/cpu/minimum_cpu.c
deleted file mode 100644
index 7cb6630..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/minimum_cpu.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (0)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("minimum_sw")
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for ( i = 0; i < rank && index; i ++ )
- {
- if (strides[0] == 0)
- {
- if (i == 0)
- {
- offset += (index % out_shape[0]);
- }
- else
- {
- offset += (vsi_ssize_t)strides[i] * 2 * ( index % out_shape[i] );
- }
- }
- else if ( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
-
- return offset;
-}
-
-DEF_KERNEL_EXECUTOR(_minimum_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_SUCCESS;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- vsi_size_t out_elements = 0;
- vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
-
- vsi_nn_kernel_tensor_attr_get_stride( attr[0], stride_size[0] );
- vsi_nn_kernel_tensor_attr_get_stride( attr[1], stride_size[1] );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- for( i = 0; i < out_elements; i ++ )
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- float val1 = 0.f;
- float val2 = 0.f;
-
- in0_offset = _expand_offset( i, attr[0]->shape->data, (vsi_size_t)attr[0]->shape->size,
- stride_size[0], attr[2]->shape->data );
- in1_offset = _expand_offset( i, attr[1]->shape->data, (vsi_size_t)attr[1]->shape->size,
- stride_size[1], attr[2]->shape->data );
-
- val1 = buffer[0][in0_offset];
- val2 = buffer[1][in1_offset];
-
- buffer[2][i] = vsi_nn_min( val1, val2 );
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _minimum_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _minimum_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( minimum, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/mod_cpu.c b/src/tim/vx/internal/src/kernel/cpu/mod_cpu.c
deleted file mode 100644
index b391edd..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/mod_cpu.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.mod")
-
-/*
- * Kernel params
- */
-static vx_param_description_t _mod_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _MOD_PARAM_NUM _cnt_of_array( _mod_kernel_param_def )
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for( i = 0; i < rank && index; i ++ )
- {
- if( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
- return offset;
-}
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- int32_t isfmod = 0;
- vsi_nn_kernel_dtype_e input0_dtype = F16;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float* f32_in_buffer[_INPUT_NUM] = {NULL};
- float* f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t* in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t* out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
-
- /* prepare data */
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &isfmod);
- for (i = 0; i < _INPUT_NUM; i++) {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create(input[i]);
- vsi_nn_kernel_tensor_attr_get_stride(in_attr[i], in_stride_size[i]);
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer(input[i], in_attr[i], TRUE);
- CHECK_PTR_FAIL_GOTO(f32_in_buffer[i], "Create input0 buffer fail.", final);
- }
-
- input0_dtype = in_attr[0]->dtype;
- if (input0_dtype == F16 || input0_dtype == F32 || input0_dtype == BF16) {
- isfmod = 1;
- }
-
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create(output[i]);
- vsi_nn_kernel_tensor_attr_get_stride(out_attr[i], out_stride_size[i]);
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size(out_attr[i]);
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float*)malloc(out_bytes[i]);
- CHECK_PTR_FAIL_GOTO(f32_out_buffer[i], "Create output buffer fail.", final);
- memset(f32_out_buffer[i], 0, out_bytes[i]);
- }
-
- for (i = 0; i < out_elements[0]; i++)
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- float in0 = 0;
- float in1 = 0;
-
- in0_offset = _expand_offset( i, in_attr[0]->shape->data, (vsi_size_t)in_attr[0]->shape->size,
- in_stride_size[0], out_attr[0]->shape->data );
- in1_offset = _expand_offset( i, in_attr[1]->shape->data, (vsi_size_t)in_attr[1]->shape->size,
- in_stride_size[1], out_attr[0]->shape->data );
- in0 = f32_in_buffer[0][in0_offset];
- in1 = f32_in_buffer[1][in1_offset];
- if (isfmod)
- {
- f32_out_buffer[0][i] = (float)fmod(in0,in1);
- }
- else
- {
- f32_out_buffer[0][i] = in0 - in1 * (float)floor(in0 / in1);
- }
- }
-
- /* save data */
- for (i = 0; i < _OUTPUT_NUM; i++) {
- status = vsi_nn_kernel_tensor_write_from_float(
- output[i], out_attr[i], f32_out_buffer[i], out_elements[i]);
- CHECK_STATUS_FAIL_GOTO(status, final);
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++) {
- if (f32_in_buffer[i]) {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
-
- if (in_attr[i]) {
- vsi_nn_kernel_tensor_attr_release(&in_attr[i]);
- }
- }
-
- for (i = 0; i < _OUTPUT_NUM; i++) {
- if (f32_out_buffer[i]) {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
-
- if (out_attr[i]) {
- vsi_nn_kernel_tensor_attr_release(&out_attr[i]);
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _mod_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _mod_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_MOD_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t isfmod = vsi_nn_kernel_param_get_int32(params, "isfmod");
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _MOD_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[3] = vsi_nn_kernel_scalar_create( graph, I32, &isfmod );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _MOD_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[3] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( mod, _setup )
-
diff --git a/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c b/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c
deleted file mode 100644
index 431eee7..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (3)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (2)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.moments")
-
-DEF_KERNEL_EXECUTOR(_moments_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_size_t i = 0;
- int32_t axis_first = 0;
- int32_t axis_num = 0;
- uint32_t mask = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis_first);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[4], &axis_num);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_uint32((vsi_nn_kernel_scalar_t)param[5], &mask);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- if(mask == 0)
- {
- vsi_size_t outerSize = 1;
- vsi_size_t axisSize = 1;
- vsi_size_t innerSize = 1;
- vsi_size_t inner = 0;
- vsi_size_t outer = 0;
-
- for (i = 0; i < (vsi_size_t)axis_first; i++)
- {
- innerSize *= attr[0]->shape->data[i];
- }
-
- for(i = 0; i < (vsi_size_t)axis_num; i++)
- {
- axisSize *= attr[0]->shape->data[axis_first + i];
- }
-
- for (i = (vsi_size_t)axis_first + axis_num; i < attr[0]->shape->size; i++)
- {
- outerSize *= attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- float sum = .0f;
- float sumsq = .0f;
- float mean = .0f;
- float vari = .0f;
-
- for (i = 0; i < axisSize; ++i)
- {
- float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
- sum += value;
- sumsq += (value * value);
- }
- mean = sum / (axisSize);
- vari = sumsq / (axisSize) - mean * mean;
- buffer[1][outer * innerSize + inner] = (float)mean;
- buffer[2][outer * innerSize + inner] = (float)vari;
- }
- }
- }
- else
- {
- vsi_size_t width = attr[0]->shape->data[0];
- vsi_size_t height = attr[0]->shape->size > 1 ? attr[0]->shape->data[1] : 1;
- vsi_size_t channel = attr[0]->shape->size > 2 ? attr[0]->shape->data[2] : 1;
- vsi_size_t batch = attr[0]->shape->size > 3 ? attr[0]->shape->data[3] : 1;
- vsi_size_t width_o = attr[1]->shape->data[0];
- vsi_size_t height_o = attr[1]->shape->size > 1 ? attr[1]->shape->data[1] : 1;
- vsi_size_t channel_o = attr[1]->shape->size > 2 ? attr[1]->shape->data[2] : 1;
- vsi_size_t b = 0, c = 0, h = 0;
- vsi_size_t wh_offset = width * height;
- vsi_size_t axisSize = width * channel;
- vsi_size_t vol = width_o * height_o * channel_o;
-
- for(b = 0; b < batch; b++)
- {
- for(h = 0; h < height; h++)
- {
- float sum = .0f;
- float sumsq = .0f;
- float mean = .0f;
- float vari = .0f;
- vsi_size_t h_offset = h * width;
- for(c = 0; c < channel; c++)
- {
- vsi_size_t offset = h_offset + c * wh_offset;
- for(i = 0; i < width; i++)
- {
- float value = buffer[0][i + offset];
- sum += value;
- sumsq += (value * value);
- }
- }
- mean = sum / (axisSize);
- vari = sumsq / (axisSize) - mean * mean;
- buffer[1][b * vol + h] = (float)mean;
- buffer[2][b * vol + h] = (float)vari;
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- status |= vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_yuv420_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _moments_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _MOMENTS_PARAM_NUM _cnt_of_array( _moments_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _moments_exec;
- kernel->info.parameters = _moments_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _moments_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis_num = 0;
- size_t axis_num_temp = 0;
- int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
- vsi_bool is_continue_axis = TRUE;
- uint32_t mask = 0;
- int32_t i = 0;
-
- axis_num = (int32_t)axis_num_temp;
-
- for ( i = 1; i < axis_num; i++)
- {
- if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)
- {
- is_continue_axis = FALSE;
- break;
- }
- }
-
- if (is_continue_axis == FALSE)
- {
- for(i = 0; i < axis_num; i++)
- {
- mask |= (1 << axis[i]);
- }
- }
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- int32_t axis_first = axis[0];
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[3] = vsi_nn_kernel_scalar_create( graph, I32, &axis_first );
- backend_params[4] = vsi_nn_kernel_scalar_create( graph, I32, &axis_num );
- backend_params[5] = vsi_nn_kernel_scalar_create( graph, U32, &mask );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( moments, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/nms_cpu.c b/src/tim/vx/internal/src/kernel/cpu/nms_cpu.c
deleted file mode 100644
index f387d81..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/nms_cpu.c
+++ /dev/null
@@ -1,440 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (2)
-#define _OUTPUT_NUM (3)
- #define _CPU_IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.nms")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _nms_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define SCALAR_INPUT_MAX_SIZE (5)
-#define SCALAR_INPUT_IOU_THRES (6)
-#define SCALAR_INPUT_SCORE_THRES (7)
-#define SCALAR_INPUT_SOFT_NMS_SIGMA (8)
-#define _NMS_PARAM_NUM _cnt_of_array( _nms_kernel_param_def )
-
-typedef struct Candidate_s
-{
- int index;
- float score;
- int suppress_begin_index;
-}Candidate;
-static void _swap_element
- (
- Candidate* list,
- uint32_t first,
- uint32_t second
- )
-{
- Candidate temp;
- memcpy(&temp, &list[first], sizeof(Candidate));
- memcpy(&list[first], &list[second], sizeof(Candidate));
- memcpy(&list[second], &temp, sizeof(Candidate));
-}
-
-static uint32_t _max_element
- (
- Candidate* list,
- uint32_t len
- )
-{
- uint32_t i;
- uint32_t max_index = 0;
- float max_val = list[0].score;
- for ( i = 1; i < len; i++ )
- {
- float val = list[i].score;
- if ( max_val < val )
- {
- max_val = val;
- max_index = i;
- }
- }
-
- return max_index;
-}
-
-typedef struct box_corner_encoding_s
-{
- float y1;
- float x1;
- float y2;
- float x2;
-}box_corner_encoding;
-
-static float _computeIntersectionOverUnion
- (
- const float* boxes,
- const int32_t i,
- const int32_t j
- )
-{
- box_corner_encoding box_i = ((box_corner_encoding *)boxes)[i];
- box_corner_encoding box_j = ((box_corner_encoding *)boxes)[j];
- const float box_i_y_min = vsi_nn_min(box_i.y1, box_i.y2);
- const float box_i_y_max = vsi_nn_max(box_i.y1, box_i.y2);
- const float box_i_x_min = vsi_nn_min(box_i.x1, box_i.x2);
- const float box_i_x_max = vsi_nn_max(box_i.x1, box_i.x2);
- const float box_j_y_min = vsi_nn_min(box_j.y1, box_j.y2);
- const float box_j_y_max = vsi_nn_max(box_j.y1, box_j.y2);
- const float box_j_x_min = vsi_nn_min(box_j.x1, box_j.x2);
- const float box_j_x_max = vsi_nn_max(box_j.x1, box_j.x2);
-
- const float area_i =
- (box_i_y_max - box_i_y_min) * (box_i_x_max - box_i_x_min);
- const float area_j =
- (box_j_y_max - box_j_y_min) * (box_j_x_max - box_j_x_min);
- const float intersection_ymax = vsi_nn_min(box_i_y_max, box_j_y_max);
- const float intersection_xmax = vsi_nn_min(box_i_x_max, box_j_x_max);
- const float intersection_ymin = vsi_nn_max(box_i_y_min, box_j_y_min);
- const float intersection_xmin = vsi_nn_max(box_i_x_min, box_j_x_min);
- const float intersection_area =
- vsi_nn_max(intersection_ymax - intersection_ymin, 0.0f) *
- vsi_nn_max(intersection_xmax - intersection_xmin, 0.0f);
-
- if (area_i <= 0 || area_j <= 0)
- {
- return 0.0f;
- }
-
- return intersection_area / (area_i + area_j - intersection_area);
-}
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_SUCCESS;
- vsi_nn_kernel_tensor_t tensors[_INPUT_NUM] = { NULL };
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float * buffer[_INPUT_NUM] = { NULL };
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_size_t stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_nn_kernel_tensor_attr_t * attr[_INPUT_NUM] = { NULL };
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- int32_t i = 0;
- int32_t num_boxes = 0;
- float* boxes = NULL;
- float* scores = NULL;
- float* selected_indices = NULL;
- float* selected_scores = NULL;
- float* num_selected_indices = NULL;
- Candidate * candidate = NULL;
- int32_t select_size = 0;
- int32_t max_output_size = 0;
- int32_t select_start = 0;
- int32_t select_len = 0;
- float iou_threshold = 0.f;
- float score_threshold = 0.f;
- float soft_nms_sigma = 0.f;
- float scale = 0;
- int32_t num_outputs = 0;
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_MAX_SIZE],
- &max_output_size);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_IOU_THRES],
- &iou_threshold);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_SCORE_THRES],
- &score_threshold);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_SOFT_NMS_SIGMA],
- &soft_nms_sigma);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for ( i = 0; i < _INPUT_NUM; i++)
- {
- tensors[i] = (vsi_nn_kernel_tensor_t)param[i];
- attr[i] = vsi_nn_kernel_tensor_attr_create( tensors[i] );
-
- vsi_nn_kernel_tensor_attr_get_stride( attr[i], stride_size[i] );
- buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[i], attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[i], "Create input buffer fail.", final );
- }
-
- for ( i = 0; i < _OUTPUT_NUM; i++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
-
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- f32_out_buffer[i] = (float *)malloc( out_elements[i] * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_elements[i] * sizeof(float) );
- }
-
- num_boxes = (int32_t)attr[0]->shape->data[1];
- boxes = buffer[0];
- scores = buffer[1];
- selected_indices = f32_out_buffer[0];
- selected_scores = f32_out_buffer[1];
- num_selected_indices = f32_out_buffer[2];
-
- candidate = (Candidate*)malloc(num_boxes * sizeof(Candidate));
- CHECK_PTR_FAIL_GOTO( candidate, "Create select buffer fail.", final );
- memset(candidate, 0, num_boxes * sizeof(Candidate));
-
- for (i = 0; i < num_boxes; ++i)
- {
- if (scores[i] > score_threshold)
- {
- candidate[select_size].index = i;
- candidate[select_size].score = scores[i];
- candidate[select_size].suppress_begin_index = 0;
- select_size++;
- }
- }
-
- num_outputs = vsi_nn_min(select_size, max_output_size);
-
- if (num_outputs == 0)
- {
- num_selected_indices[0] = 0;
- }
-
- if (soft_nms_sigma > 0.0f)
- {
- scale = -0.5f / soft_nms_sigma;
- }
-
- select_len = 0;
- while (select_len < num_outputs && select_start < select_size)
- {
- int32_t j = 0;
- float original_score = 0;
- vsi_bool should_hard_suppress = FALSE;
-
- // find max score and swap to the front.
- int32_t max_index = _max_element( &candidate[select_start], select_size - select_start);
-
- if (max_index != select_size - select_start - 1)
- {
- _swap_element(&(candidate[select_start]), max_index, 0);
- }
-
- original_score = candidate[select_start].score;
- // Calculate IoU of the rest, swap to the end (disgard) if needed.
- for ( j = select_len - 1; j >= candidate[select_start].suppress_begin_index; j-- )
- {
- int32_t idx = (int32_t)selected_indices[j];
- float iou = _computeIntersectionOverUnion(boxes, candidate[select_start].index, idx);
-
- // First decide whether to perform hard suppression.
- if (iou >= iou_threshold)
- {
- should_hard_suppress = TRUE;
- break;
- }
-
- // Suppress score if NMS sigma > 0.
- if (soft_nms_sigma > 0.0)
- {
- candidate[select_start].score =
- candidate[select_start].score * (float)exp(scale * iou * iou);
- }
-
- if (candidate[select_start].score <= score_threshold)
- break;
- }
-
- candidate[select_start].suppress_begin_index = select_len;
- if (!should_hard_suppress)
- {
- if (candidate[select_start].score == original_score)
- {
- // Suppression has not occurred, so select next_candidate.
- selected_indices[select_len] = (float)candidate[select_start].index;
- selected_scores[select_len] = candidate[select_start].score;
- ++ select_len;
- }
- if ( candidate[select_start].score > score_threshold)
- {
- // Soft suppression might have occurred and current score is still
- // greater than score_threshold; add next_candidate back onto priority
- // queue.
- candidate[select_start].suppress_begin_index = select_len;
- }
- }
-
- select_start ++;
- }
-
- num_selected_indices[0] = (float)select_len;
-
- for ( i = select_len; i < max_output_size; i++)
- {
- selected_indices[i] = 0;
- selected_scores[i] = 0;
- }
-
- /* save data */
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- vsi_nn_safe_free(candidate);
- for( i = 0; i < _INPUT_NUM; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
-
- for ( i = 0; i < _OUTPUT_NUM; i++ )
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _nms_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _nms_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_NMS_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t max_output_size = vsi_nn_kernel_param_get_int32(params, "max_output_size");
- float iou_threshold = vsi_nn_kernel_param_get_float32(params, "iou_threshold");
- float score_threshold = vsi_nn_kernel_param_get_float32(params, "score_threshold");
- float soft_nms_sigma = vsi_nn_kernel_param_get_float32(params, "soft_nms_sigma");
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _NMS_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- node_params[SCALAR_INPUT_MAX_SIZE] = vsi_nn_kernel_scalar_create(
- graph, I32, &max_output_size );
- node_params[SCALAR_INPUT_IOU_THRES] = vsi_nn_kernel_scalar_create(
- graph, F32, &iou_threshold );
- node_params[SCALAR_INPUT_SCORE_THRES] = vsi_nn_kernel_scalar_create(
- graph, F32, &score_threshold );
- node_params[SCALAR_INPUT_SOFT_NMS_SIGMA] = vsi_nn_kernel_scalar_create(
- graph, F32, &soft_nms_sigma );
- status = vsi_nn_kernel_node_pass_param( node, node_params, _NMS_PARAM_NUM );
-
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_MAX_SIZE] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_IOU_THRES] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCORE_THRES] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SOFT_NMS_SIGMA] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( nms, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/one_hot_cpu.c b/src/tim/vx/internal/src/kernel/cpu/one_hot_cpu.c
deleted file mode 100644
index 5508499..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/one_hot_cpu.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
- #define _IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.one_hot")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _one_hot_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-#define INPUT_SCALAR_DEPTH (2)
-#define INPUT_SCALAR_ON_VALUE (3)
-#define INPUT_SCALAR_OFF_VALUE (4)
-#define INPUT_SCALAR_AXIS (5)
-#define _ONE_HOT_PARAM_NUM _cnt_of_array( _one_hot_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_IO_NUM] = { NULL };
- float * buffer[_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_IO_NUM] = { NULL };
- vsi_size_t i = 0;
- int32_t j = 0, m = 0;
- vsi_size_t k = 0;
- int32_t index = 0;
- int32_t depth = 0;
- float on_value = 0;
- float off_value = 0;
- int32_t axis = 0;
- vsi_size_t prefix_dim_size = 1;
- vsi_size_t suffix_dim_size = 0;
- int32_t num_elements = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &depth);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[3], &on_value);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[4], &off_value);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- num_elements = (int32_t)vsi_nn_kernel_tensor_attr_get_size( attr[0] );
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- axis = axis == -1 ? (int32_t)attr[0]->shape->size : (int32_t)attr[0]->shape->size - axis;
-
- for (m = 0; m < axis; m++)
- {
- prefix_dim_size *= attr[0]->shape->data[m];
- }
-
- suffix_dim_size = num_elements / prefix_dim_size;
-
- for (i = 0; i < prefix_dim_size; i++)
- {
- for (j = 0; j < depth; j++)
- {
- for (k = 0; k < suffix_dim_size; k++)
- {
- int32_t value = (int32_t)buffer[0][i * suffix_dim_size + k];
- buffer[1][index ++] = value == j ? on_value : off_value;
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-final:
-#define SAFE_FREE_TENSOR_ATTR(_PTR) if ( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
- SAFE_FREE_TENSOR_ATTR(attr[0]);
- SAFE_FREE_TENSOR_ATTR(attr[1]);
-#undef SAFE_FREE_TENSOR_ATTR
- for ( i = 0; i < _IO_NUM; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- buffer[i] = NULL;
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _one_hot_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _one_hot_kernel_param_def );
- status = VSI_SUCCESS;
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_ONE_HOT_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t depth = vsi_nn_kernel_param_get_int32( params, "depth" );
- float on_value = vsi_nn_kernel_param_get_float32( params, "on_value" );
- float off_value = vsi_nn_kernel_param_get_float32( params, "off_value" );
- int32_t axis = vsi_nn_kernel_param_get_int32( params, "axis" );
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _ONE_HOT_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[INPUT_SCALAR_DEPTH] = vsi_nn_kernel_scalar_create(
- graph, I32, &depth );
- node_params[INPUT_SCALAR_ON_VALUE] = vsi_nn_kernel_scalar_create(
- graph, F32, &on_value );
- node_params[INPUT_SCALAR_OFF_VALUE] = vsi_nn_kernel_scalar_create(
- graph, F32, &off_value );
- node_params[INPUT_SCALAR_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _ONE_HOT_PARAM_NUM );
- CHECK_STATUS_FAIL_GOTO( status, OnError );
- }
- }
-OnError:
- if (node_params[INPUT_SCALAR_DEPTH])
- {
- vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_DEPTH] );
- }
-
- if (node_params[INPUT_SCALAR_ON_VALUE])
- {
- vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_ON_VALUE] );
- }
-
- if (node_params[INPUT_SCALAR_OFF_VALUE])
- {
- vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_OFF_VALUE] );
- }
-
- if (node_params[INPUT_SCALAR_AXIS])
- {
- vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_AXIS] );
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( one_hot, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/poolwithargmax_cpu.c b/src/tim/vx/internal/src/kernel/cpu/poolwithargmax_cpu.c
deleted file mode 100644
index 19a6e85..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/poolwithargmax_cpu.c
+++ /dev/null
@@ -1,335 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (2)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.poolwithargmax")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _poolwithargmax_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
-};
-#define _POOLWITHARGMAX_PARAM_NUM _cnt_of_array( _poolwithargmax_kernel_param_def )
-
-#define SCALAR_KSZIE_X (3)
-#define SCALAR_KSZIE_Y (4)
-#define SCALAR_STRIDE_X (5)
-#define SCALAR_STRIDE_Y (6)
-#define SCALAR_PAD_X (7)
-#define SCALAR_PAD_Y (8)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- vsi_size_t i, j, b, p;
- vsi_size_t batch, depth_v, height_o, width_o, height, width;
- int32_t ksize_x = 0;
- int32_t ksize_y = 0;
- int32_t stride_x = 0;
- int32_t stride_y = 0;
- int32_t pad_x = 0;
- int32_t pad_y = 0;
- vsi_size_t output_base = 0;
- vsi_size_t input_base = 0;
- vsi_ssize_t max_index = 0;
- vsi_nn_kernel_dtype_e out1_dtype;
- vsi_bool is_relative_coord = FALSE;
-
-
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_KSZIE_X], &ksize_x);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_KSZIE_Y], &ksize_y);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_STRIDE_X], &stride_x);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_STRIDE_Y], &stride_y);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_PAD_X], &pad_x);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_PAD_Y], &pad_y);
-
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- batch = out_attr[0]->shape->size > 3 ? out_attr[0]->shape->data[3] : 1;
- depth_v = out_attr[0]->shape->size > 2 ? out_attr[0]->shape->data[2] : 1;
- height_o = out_attr[0]->shape->data[1];
- width_o = out_attr[0]->shape->data[0];
- width = in_attr[0]->shape->data[0];
- height = in_attr[0]->shape->data[1];
-
- out1_dtype = out_attr[1]->dtype;
-
- if ((I8 == out1_dtype) || (U8 == out1_dtype) || (I16 == out1_dtype))
- {
- is_relative_coord = TRUE;
- }
-
- for(b = 0; b < batch; b++)
- {
- for (p = 0; p < depth_v; p ++)
- {
- output_base = b * depth_v * height_o * width_o + p * height_o * width_o;
- input_base = b * depth_v * height * width + p * height * width;
- for (j = 0; j < height_o; j ++)
- {
- for (i = 0; i < width_o; i ++)
- {
- vsi_ssize_t hstart = j * stride_y - pad_y;
- vsi_ssize_t wstart = i * stride_x - pad_x;
- vsi_size_t hoffset = 0;
- vsi_size_t woffset = 0;
- vsi_size_t hend = vsi_nn_min(hstart + ksize_y, (vsi_ssize_t)height);
- vsi_size_t wend = vsi_nn_min(wstart + ksize_x, (vsi_ssize_t)width);
- vsi_size_t pool_index = 0;
- vsi_size_t h, w = 0;
- vsi_size_t cur_index = 0;
- float d_f32 = 0.0f;
-
- if (hstart < 0)
- {
- hoffset = -hstart;
- }
-
- if (wstart < 0)
- {
- woffset = -wstart;
- }
-
- hstart = vsi_nn_max(hstart, 0);
- wstart = vsi_nn_max(wstart, 0);
-
- pool_index = output_base + j * width_o + i;
- max_index = is_relative_coord ? 0 : (input_base + hstart * width + wstart);
- d_f32 = f32_in_buffer[0][input_base + hstart * width + wstart];
- for (h = hstart; h < hend; ++ h)
- {
- cur_index = (h - hstart + hoffset) * ksize_x + woffset;
- for (w = wstart; w < wend; ++ w)
- {
- vsi_ssize_t index = input_base + h * width + w;
- float d;
-
- d = f32_in_buffer[0][index];
- if (d > d_f32)
- {
- d_f32 = d;
- max_index = is_relative_coord ? cur_index : index;
- }
- cur_index++;
- }
- }
- f32_out_buffer[0][pool_index] = d_f32;
- f32_out_buffer[1][pool_index] = (float)max_index;
- }
- }
- }
- }
- out_attr[1]->quant = VSI_NN_KERNEL_QUANT_NONE;
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _poolwithargmax_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _poolwithargmax_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_POOLWITHARGMAX_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t ksize_x = 0;
- int32_t ksize_y = 0;
- int32_t stride_x = 0;
- int32_t stride_y = 0;
- int32_t pad_x = 0;
- int32_t pad_y = 0;
-
- ksize_x = vsi_nn_kernel_param_get_int32(params, "ksize_x");
- ksize_y = vsi_nn_kernel_param_get_int32(params, "ksize_y");
- stride_x = vsi_nn_kernel_param_get_int32(params, "stride_x");
- stride_y = vsi_nn_kernel_param_get_int32(params, "stride_y");
- pad_x = vsi_nn_kernel_param_get_int32(params, "pad_x");
- pad_y = vsi_nn_kernel_param_get_int32(params, "pad_y");
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _POOLWITHARGMAX_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_KSZIE_X] = vsi_nn_kernel_scalar_create(
- graph, I32, &ksize_x );
- node_params[SCALAR_KSZIE_Y] = vsi_nn_kernel_scalar_create(
- graph, I32, &ksize_y );
- node_params[SCALAR_STRIDE_X] = vsi_nn_kernel_scalar_create(
- graph, I32, &stride_x );
- node_params[SCALAR_STRIDE_Y] = vsi_nn_kernel_scalar_create(
- graph, I32, &stride_y );
- node_params[SCALAR_PAD_X] = vsi_nn_kernel_scalar_create(
- graph, I32, &pad_x );
- node_params[SCALAR_PAD_Y] = vsi_nn_kernel_scalar_create(
- graph, I32, &pad_y );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _POOLWITHARGMAX_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_KSZIE_X] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_KSZIE_Y] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_STRIDE_X] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_STRIDE_Y] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_PAD_X] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_PAD_Y] );
- }
- }
-
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( poolwithargmax, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pow_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pow_cpu.c
deleted file mode 100644
index 39d53dd..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pow_cpu.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (0)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("pow_sw")
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i = 0;
- vsi_ssize_t offset = 0;
-
- for( i = 0; i < rank && index; i ++ )
- {
- if( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
- return offset;
-}
-
-DEF_KERNEL_EXECUTOR(_pow_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_SUCCESS;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- vsi_size_t out_elements = 0;
- vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- vsi_nn_kernel_tensor_attr_get_stride( attr[0], stride_size[0] );
- vsi_nn_kernel_tensor_attr_get_stride( attr[1], stride_size[1] );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- for( i = 0; i < out_elements; i ++ )
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- float val1 = 0.f;
- float val2 = 0.f;
-
- in0_offset = _expand_offset( i, attr[0]->shape->data, (vsi_size_t)attr[0]->shape->size,
- stride_size[0], attr[2]->shape->data );
- in1_offset = _expand_offset( i, attr[1]->shape->data, (vsi_size_t)attr[1]->shape->size,
- stride_size[1], attr[2]->shape->data );
-
- val1 = buffer[0][in0_offset];
- val2 = buffer[1][in1_offset];
-
- buffer[2][i] = (float)pow( val1, val2 );
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pow_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pow_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pow, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_bgra_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_bgra_cpu.c
deleted file mode 100644
index bca6300..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_bgra_cpu.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (10)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_bgra_sw")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-
-DEF_KERNEL_EXECUTOR(_pre_process_bgra_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- float * outBuffer = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float rMean = 0, gMean = 0, bMean = 0, var = 0;
- int32_t order = 0, trans = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- i = 2;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &rMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &gMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &bMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &var);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &order);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &trans);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- if(trans)
- {
- outBuffer = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( outBuffer, "Create output buffer fail.", final );
- memset( outBuffer, 0, out_elements * sizeof(float) );
- }
-
- {
- int32_t elementSize = 4;
- int32_t rline1[2], rline2[2];
- int32_t gline1[2], gline2[2];
- int32_t bline1[2], bline2[2];
- int32_t dx = 0, dy = 0, dz = 0;
- int32_t src_stride = (int32_t)attr[0]->shape->data[0];
- int32_t src_width = (int32_t)(src_stride / elementSize);
- int32_t src_height = (int32_t)attr[0]->shape->data[1];
- int32_t dst_width = (int32_t)(trans ? attr[1]->shape->data[1] : attr[1]->shape->data[0]);
- int32_t dst_height = (int32_t)(trans ? attr[1]->shape->data[2] : attr[1]->shape->data[1]);
- int32_t stride = (int32_t)(dst_width * dst_height);
- int32_t bOffset = 0;
- int32_t gOffset = 1 * stride;
- int32_t rOffset = 2 * stride;
- uint8_t R = 0, G = 0, B = 0;
-
- if(order)
- {
- bOffset = 2 * stride;
- rOffset = 0;
- }
-
- for ( dz = 0; dz < 1; dz ++)
- {
- for ( dy = 0; dy < (int32_t)dst_height; dy ++)
- {
- for ( dx = 0; dx < (int32_t)dst_width; dx ++)
- {
- int32_t source_index = 0;
- int32_t output_index = dx + dy * dst_width;
- int32_t dstR_idx = output_index + rOffset;
- int32_t dstG_idx = output_index + gOffset;
- int32_t dstB_idx = output_index + bOffset;
- float finalVal = 0;
-
- if(xRatio != (1 << 15) || yRatio != (1 << 15))
- {
- int32_t fx = (dx * xRatio + (xRatio >> 1)) - (1 << 14);
- int32_t sx = fx & 0xffff8000; // Floor
- int32_t fy = 0, sy = 0;
- int32_t temp1 = 0, temp2 = 0;
-
- fx -= sx;
- sx = sx >> 15;
-
- sx = sx < 0 ? 0 : sx;
- sx = sx > src_width ? src_width - 1: sx;
-
- fx = (fx +(1 << 4)) >> 5;
-
- // for y
- fy = (dy * yRatio + (yRatio >> 1)) - (1<< 14);
- sy = fy & 0xffff8000; // Floor
- fy -= sy;
- sy = sy >> 15;
-
- sy = sy < 0 ? 0 : sy;
- fy = fy < 0 ? 0 : fy;
-
- fy = (fy + (1<< 4)) >> 5;
-
- sx += xOffset;
- sy += yOffset;
- source_index = (sx + sy * src_width + dz * src_width * src_height) * elementSize;
-
- bline1[0] = (int32_t)buffer[0][source_index];
- bline1[1] = (int32_t)buffer[0][source_index + elementSize];
- bline2[0] = (int32_t)buffer[0][source_index + src_stride];
- bline2[1] = (int32_t)buffer[0][source_index + src_stride + elementSize];
-
- gline1[0] = (int32_t)buffer[0][source_index + 1];
- gline1[1] = (int32_t)buffer[0][source_index + elementSize + 1];
- gline2[0] = (int32_t)buffer[0][source_index + src_stride + 1];
- gline2[1] = (int32_t)buffer[0][source_index + src_stride + elementSize + 1];
-
- rline1[0] = (int32_t)buffer[0][source_index + 2];
- rline1[1] = (int32_t)buffer[0][source_index + elementSize + 2];
- rline2[0] = (int32_t)buffer[0][source_index + src_stride + 2];
- rline2[1] = (int32_t)buffer[0][source_index + src_stride + elementSize + 2];
-
- // B
- temp1 = fx * (bline1[1] - bline1[0]) + (bline1[0] << 10);
- temp2 = fx * (bline2[1] - bline2[0]) + (bline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- B = (uint8_t)(DESCALE(temp1));
- finalVal = (B - bMean) * var;
- buffer[1][dstB_idx] = finalVal;
-
- // R
- temp1 = fx * (rline1[1] - rline1[0]) + (rline1[0] << 10);
- temp2 = fx * (rline2[1] - rline2[0]) + (rline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- R = (uint8_t)(DESCALE(temp1));
- finalVal = (R - rMean) * var;
- buffer[1][dstR_idx] = finalVal;
-
- // G
- temp1 = fx * (gline1[1] - gline1[0]) + (gline1[0] << 10);
- temp2 = fx * (gline2[1] - gline2[0]) + (gline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- G = (uint8_t)(DESCALE(temp1));
- finalVal = (G - gMean) * var;
- buffer[1][dstG_idx] = finalVal;
- }
- else //copy
- {
- int32_t offset = xOffset + yOffset * src_width;
- source_index = (dx + dy * src_width + offset) * elementSize;
-
- finalVal = (buffer[0][source_index] - bMean) * var;
- buffer[1][dstB_idx] = finalVal;
-
- finalVal = (buffer[0][source_index + 1] - gMean) * var;
- buffer[1][dstG_idx] = finalVal;
-
- finalVal = (buffer[0][source_index + 2] - rMean) * var;
- buffer[1][dstR_idx] = finalVal;
- }
- }
- }
- }
- }
-
- if(trans)
- {
- vsi_size_t shape[] = {attr[1]->shape->data[0], attr[1]->shape->data[1], attr[1]->shape->data[2], 1};
- vsi_size_t perm[] = {1, 2, 0, 3};
- vsi_nn_Transpose((uint8_t*)outBuffer, (uint8_t*)buffer[1],
- shape, (uint32_t)attr[1]->shape->size, perm, VSI_NN_TYPE_FLOAT32);
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- outBuffer, out_elements );
- }
- else
- {
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- }
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- if(outBuffer)
- {
- free(outBuffer);
- }
-
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_bgra_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pre_process_bgra_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 2;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
- float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
- float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
- float bgra_scale = vsi_nn_kernel_param_get_float32( params, "rgb_scale" );
- int32_t reverse = vsi_nn_kernel_param_get_int32( params, "reverse" );
- int32_t trans = vsi_nn_kernel_param_get_int32( params, "enable_perm" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &bgra_scale );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &reverse );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &trans );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[2] );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- vsi_nn_kernel_scalar_release( &backend_params[6] );
- vsi_nn_kernel_scalar_release( &backend_params[7] );
- vsi_nn_kernel_scalar_release( &backend_params[8] );
- vsi_nn_kernel_scalar_release( &backend_params[9] );
- vsi_nn_kernel_scalar_release( &backend_params[10] );
- vsi_nn_kernel_scalar_release( &backend_params[11] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_bgra, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_gray_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_gray_cpu.c
deleted file mode 100644
index f7d4248..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_gray_cpu.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (6)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_gray_sw")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-
-DEF_KERNEL_EXECUTOR(_pre_process_gray_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float mean = 0, scale = 1;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- i = 2;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &mean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &scale);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- {
- int32_t line1[2], line2[2];
- int32_t dx = 0, dy = 0, dz = 0;
- int32_t src_width = (int32_t)attr[0]->shape->data[0];
- int32_t src_height = (int32_t)attr[0]->shape->data[1];
- int32_t dst_width = (int32_t)attr[1]->shape->data[0];
- int32_t dst_height = (int32_t)attr[1]->shape->data[1];
- uint8_t result = 0;
-
- for ( dz = 0; dz < 1; dz ++)
- {
- for ( dy = 0; dy < (int32_t)dst_height; dy ++)
- {
- for ( dx = 0; dx < (int32_t)dst_width; dx ++)
- {
- int32_t source_index = 0;
- int32_t output_index = dx + dy * dst_width;
- float finalVal = 0.0f;
-
- if(xRatio != (1 << 15) || yRatio != (1 << 15))
- {
- int32_t fx = (dx * xRatio + (xRatio >> 1)) - (1 << 14);
- int32_t sx = fx & 0xffff8000; // Floor
- int32_t fy = 0, sy = 0;
- int32_t temp1 = 0;
- int32_t temp2 = 0;
-
- fx -= sx;
- sx = sx >> 15;
-
- sx = sx < 0 ? 0 : sx;
- sx = sx > src_width ? src_width - 1: sx;
-
- fx = (fx +(1 << 4)) >> 5;
-
- // for y
- fy = (dy * yRatio + (yRatio >> 1)) - (1<< 14);
- sy = fy & 0xffff8000; // Floor
- fy -= sy;
- sy = sy >> 15;
-
- sy = sy < 0 ? 0 : sy;
- fy = fy < 0 ? 0 : fy;
-
- fy = (fy + (1<< 4)) >> 5;
-
- sx += xOffset;
- sy += yOffset;
- source_index = (sx + sy * src_width + dz * src_width * src_height);
-
- line1[0] = (int32_t)buffer[0][source_index];
- line1[1] = (int32_t)buffer[0][source_index + 1];
- line2[0] = (int32_t)buffer[0][source_index + src_width];
- line2[1] = (int32_t)buffer[0][source_index + src_width + 1];
-
- temp1 = fx * (line1[1] - line1[0]) + (line1[0] << 10);
- temp2 = fx * (line2[1] - line2[0]) + (line2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- result = (uint8_t)(DESCALE(temp1));
- finalVal = (result - mean) * scale;
- buffer[1][output_index] = finalVal;
- }
- else
- {
- int32_t offset = xOffset + yOffset * src_width;
- source_index = dx + dy * src_width + offset;
- finalVal = (buffer[0][source_index] - mean) * scale;
- buffer[1][output_index] = finalVal;
- }
- }
- }
- }
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_gray_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pre_process_gray_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 2;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float mean = vsi_nn_kernel_param_get_float32( params, "mean" );
- float scale = vsi_nn_kernel_param_get_float32( params, "scale" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &scale );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[2] );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- vsi_nn_kernel_scalar_release( &backend_params[6] );
- vsi_nn_kernel_scalar_release( &backend_params[7] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_gray, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_nv12_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_nv12_cpu.c
deleted file mode 100644
index f9c47f9..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_nv12_cpu.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (10)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_nv12_sw")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-
-DEF_KERNEL_EXECUTOR(_pre_process_nv12_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- float * outBuffer = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float rMean = 0, gMean = 0, bMean = 0, var = 0;
- int32_t order = 0, trans = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- i = 3;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &rMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &gMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &bMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &var);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &order);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &trans);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- if(trans)
- {
- outBuffer = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( outBuffer, "Create output buffer fail.", final );
- memset( outBuffer, 0, out_elements * sizeof(float) );
- }
-
- {
- int32_t dx, dy, dz;
- int32_t src_width = (int32_t)attr[0]->shape->data[0];
- int32_t dst_width = (int32_t)(trans ? attr[2]->shape->data[1] : attr[2]->shape->data[0]);
- int32_t dst_height = (int32_t)(trans ? attr[2]->shape->data[2] : attr[2]->shape->data[1]);
- int32_t stride = (int32_t)(dst_width * dst_height);
- int32_t rOffset = 0;
- int32_t gOffset = 1 * stride;
- int32_t bOffset = 2 * stride;
- float D, E;
- float R, G, B;
- float min = 0;
- float max = 255;
- float* src_y_slice = NULL;
- float* src_uv_yScanline = NULL;
-
- uint32_t roi_width = (xRatio * dst_width) >> 15;
- uint32_t roi_height = (yRatio * dst_height) >> 15;
- uint32_t xrIntFloat_16 = (roi_width << 16) / dst_width + 1;
- uint32_t yrIntFloat_16 = (roi_height << 16) / dst_height + 1;
- uint32_t srcy = 0, srcx = 0;
-
- if(attr[2]->dtype == I8)
- {
- min = -128;
- max = 127;
- }
- else if(attr[2]->dtype == I16 || attr[2]->dtype == F16)
- {
- min = -65536;
- max = 65535;
- }
-
- if(order)
- {
- rOffset = 2 * stride;
- bOffset = 0;
- }
-
- for ( dz = 0; dz < 1; dz ++)
- {
- for ( dy = 0; dy < (int32_t)dst_height; dy ++)
- {
- srcy = (((uint32_t)dy * yrIntFloat_16) >> 16) + yOffset;
- src_y_slice = buffer[0] + (srcy) * src_width;
- src_uv_yScanline = buffer[1] + (srcy / 2) * src_width;
-
- for ( dx = 0; dx < (int32_t)dst_width; dx ++)
- {
- float finalVal = 0;
- int32_t output_index = 0;
- int32_t dstR_idx = 0, dstG_idx = 0, dstB_idx = 0;
- float tmpY = 0.0f;
- float tmpU = 0.0f;
- float tmpV = 0.0f;
-
- srcx = (((uint32_t)dx * xrIntFloat_16) >> 16) + xOffset;
- tmpY = src_y_slice[srcx];
- tmpU = src_uv_yScanline[(srcx / 2) * 2];
- tmpV = src_uv_yScanline[(srcx / 2) * 2 + 1];
-
- D = (tmpU - 128);
- E = (tmpV - 128);
-
- // B
- B = (float)vsi_clamp((tmpY + (1.7790 * D)), min, max);
- //G
- G = (float)vsi_clamp((tmpY - 0.3455 * D - 0.7169 * E), min, max);
- //R
- R = (float)vsi_clamp((tmpY + 1.4065 * E), min, max);
-
- output_index = dx + dy * dst_width;
-
- dstR_idx = output_index + rOffset;
- dstG_idx = output_index + gOffset;
- dstB_idx = output_index + bOffset;
-
- finalVal = (B - bMean) * var;
- buffer[2][dstB_idx] = finalVal;
-
- finalVal = (G - gMean) * var;
- buffer[2][dstG_idx] = finalVal;
-
- finalVal = (R - rMean) * var;
- buffer[2][dstR_idx] = finalVal;
- }
- }
- }
- }
-
- if(trans)
- {
- vsi_size_t shape[] = {attr[2]->shape->data[0], attr[2]->shape->data[1], attr[2]->shape->data[2], 1};
- vsi_size_t perm[] = {1, 2, 0, 3};
- vsi_nn_Transpose((uint8_t*)outBuffer, (uint8_t*)buffer[2],
- shape, (uint32_t)attr[2]->shape->size, perm, VSI_NN_TYPE_FLOAT32);
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- outBuffer, out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- else
- {
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- if(outBuffer)
- {
- free(outBuffer);
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_nv12_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pre_process_nv12_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 3;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
- float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
- float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
- float rgb_scale = vsi_nn_kernel_param_get_float32( params, "rgb_scale" );
- int32_t reverse = vsi_nn_kernel_param_get_int32( params, "reverse" );
- int32_t trans = vsi_nn_kernel_param_get_int32( params, "enable_perm" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &rgb_scale );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &reverse );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &trans );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- vsi_nn_kernel_scalar_release( &backend_params[6] );
- vsi_nn_kernel_scalar_release( &backend_params[7] );
- vsi_nn_kernel_scalar_release( &backend_params[8] );
- vsi_nn_kernel_scalar_release( &backend_params[9] );
- vsi_nn_kernel_scalar_release( &backend_params[10] );
- vsi_nn_kernel_scalar_release( &backend_params[11] );
- vsi_nn_kernel_scalar_release( &backend_params[12] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_nv12, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_rgb888_planar_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_rgb888_planar_cpu.c
deleted file mode 100644
index 845c167..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_rgb888_planar_cpu.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (8)
-#define _CPU_INPUT_NUM (3)
-#define _CPU_OUTPUT_NUM (3)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_rgb888_planar")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-/*
- * Kernel params
- */
-static vx_param_description_t _pre_process_rgb888_planar_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_OPTIONAL},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _PRE_PROCESS_RGB888_PLANAR_PARAM_NUM _cnt_of_array( _pre_process_rgb888_planar_kernel_param_def )
-
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float mean[3] = {0}, scale = 1;
- vsi_bool is_rgb888 = tensors[1] == NULL;
-
- for (i = 0; i < _CPU_IO_NUM; i++)
- {
- tensors[i] = (vsi_nn_kernel_tensor_t)param[i];
- if (tensors[i])
- {
- attr[i] = vsi_nn_kernel_tensor_attr_create( tensors[i] );
- CHECK_PTR_FAIL_GOTO( attr[i], "Create tensor attr buffer fail.", final );
- }
- }
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- i = 6;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &mean[0]);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &mean[1]);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &mean[2]);
- status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &scale);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for (i = 0; i < 3; i++)
- {
- if (tensors[i])
- {
- buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[i], attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[i], "Create input0 buffer fail.", final );
- }
-
- buffer[i + 3] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[i + 3], "Create output buffer fail.", final );
- memset( buffer[i + 3], 0, out_elements * sizeof(float) );
- }
-
- {
- int32_t line1[2], line2[2];
- int32_t dx = 0, dy = 0, idx = 0;
- int32_t src_width = (int32_t)attr[0]->shape->data[0];
- int32_t src_height = (int32_t)attr[0]->shape->data[1];
- int32_t dst_width = (int32_t)attr[3]->shape->data[0];
- int32_t dst_height = (int32_t)attr[3]->shape->data[1];
- uint8_t result = 0;
- int32_t offset = 0;
- int32_t index = 0;
-
- for ( idx = 0; idx < 3; idx ++)
- {
- offset = is_rgb888 ? idx * src_width * src_height : 0;
- index = is_rgb888 ? 0 : idx;
- for ( dy = 0; dy < (int32_t)dst_height; dy ++)
- {
- for ( dx = 0; dx < (int32_t)dst_width; dx ++)
- {
- int32_t source_index = 0;
- int32_t output_index = dx + dy * dst_width;
- float finalVal = 0.0f;
-
- if(xRatio != (1 << 15) || yRatio != (1 << 15))
- {
- int32_t fx = (dx * xRatio + (xRatio >> 1)) - (1 << 14);
- int32_t sx = fx & 0xffff8000; // Floor
- int32_t fy = 0, sy = 0;
- int32_t temp1 = 0;
- int32_t temp2 = 0;
-
- fx -= sx;
- sx = sx >> 15;
-
- sx = sx < 0 ? 0 : sx;
- sx = sx > src_width ? src_width - 1: sx;
-
- fx = (fx +(1 << 4)) >> 5;
-
- // for y
- fy = (dy * yRatio + (yRatio >> 1)) - (1<< 14);
- sy = fy & 0xffff8000; // Floor
- fy -= sy;
- sy = sy >> 15;
-
- sy = sy < 0 ? 0 : sy;
- fy = fy < 0 ? 0 : fy;
-
- fy = (fy + (1<< 4)) >> 5;
-
- sx += xOffset;
- sy += yOffset;
- source_index = (sx + sy * src_width);
-
- line1[0] = (int32_t)buffer[index][source_index + offset];
- line1[1] = (int32_t)buffer[index][source_index + 1 + offset];
- line2[0] = (int32_t)buffer[index][source_index + src_width + offset];
- line2[1] = (int32_t)buffer[index][source_index + src_width + 1 + offset];
-
- temp1 = fx * (line1[1] - line1[0]) + (line1[0] << 10);
- temp2 = fx * (line2[1] - line2[0]) + (line2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- result = (uint8_t)(DESCALE(temp1));
- finalVal = (result - mean[idx]) * scale;
- buffer[idx + 3][output_index] = finalVal;
- }
- else
- {
- int32_t ofset = xOffset + yOffset * src_width;
- source_index = dx + dy * src_width + ofset + offset;
- finalVal = (buffer[index][source_index] - mean[idx]) * scale;
- buffer[idx + 3][output_index] = finalVal;
- }
- }
- }
- }
- }
- for (i = 3; i < _CPU_IO_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( tensors[i], attr[i],
- buffer[i], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for ( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if ( buffer[i] )
- {
- free( buffer[i] );
- }
- if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_SUCCESS;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _pre_process_rgb888_planar_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _pre_process_rgb888_planar_kernel_param_def );
-
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_PRE_PROCESS_RGB888_PLANAR_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if ( VSI_SUCCESS == status)
- {
- uint32_t index = 6;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
- float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
- float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
- float scale = vsi_nn_kernel_param_get_float32( params, "scale" );
-
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _PRE_PROCESS_RGB888_PLANAR_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &scale );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _PRE_PROCESS_RGB888_PLANAR_PARAM_NUM );
- vsi_nn_kernel_scalar_release( &node_params[6] );
- vsi_nn_kernel_scalar_release( &node_params[7] );
- vsi_nn_kernel_scalar_release( &node_params[8] );
- vsi_nn_kernel_scalar_release( &node_params[9] );
- vsi_nn_kernel_scalar_release( &node_params[10] );
- vsi_nn_kernel_scalar_release( &node_params[11] );
- vsi_nn_kernel_scalar_release( &node_params[12] );
- vsi_nn_kernel_scalar_release( &node_params[13] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_rgb888_planar, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_rgb_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_rgb_cpu.c
deleted file mode 100644
index 16068b6..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_rgb_cpu.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (10)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_rgb_sw")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-
-DEF_KERNEL_EXECUTOR(_pre_process_rgb_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- float * outBuffer = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float rMean = 0, gMean = 0, bMean = 0, var = 0;
- int32_t order = 0, trans = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- i = 2;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &rMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &gMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &bMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &var);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &order);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &trans);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- if(trans)
- {
- outBuffer = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( outBuffer, "Create output buffer fail.", final );
- memset( outBuffer, 0, out_elements * sizeof(float) );
- }
-
- {
- int32_t rline1[2], rline2[2];
- int32_t gline1[2], gline2[2];
- int32_t bline1[2], bline2[2];
- int32_t dx = 0, dy = 0, dz = 0;
- int32_t src_stride = (int32_t)attr[0]->shape->data[0];
- int32_t src_width = (int32_t)(src_stride / 3);
- int32_t src_height = (int32_t)attr[0]->shape->data[1];
- int32_t dst_width = (int32_t)(trans ? attr[1]->shape->data[1] : attr[1]->shape->data[0]);
- int32_t dst_height = (int32_t)(trans ? attr[1]->shape->data[2] : attr[1]->shape->data[1]);
- int32_t stride = (int32_t)(dst_width * dst_height);
- int32_t rOffset = 0;
- int32_t gOffset = 1 * stride;
- int32_t bOffset = 2 * stride;
- uint8_t R = 0, G = 0, B = 0;
-
- if(order)
- {
- rOffset = 2 * stride;
- bOffset = 0;
- }
-
- for ( dz = 0; dz < 1; dz ++)
- {
- for ( dy = 0; dy < (int32_t)dst_height; dy ++)
- {
- for ( dx = 0; dx < (int32_t)dst_width; dx ++)
- {
- int32_t source_index = 0;
- int32_t output_index = dx + dy * dst_width;
- int32_t dstR_idx = output_index + rOffset;
- int32_t dstG_idx = output_index + gOffset;
- int32_t dstB_idx = output_index + bOffset;
- float finalVal = 0;
-
- if(xRatio != (1 << 15) || yRatio != (1 << 15))
- {
- int32_t fx = (dx * xRatio + (xRatio >> 1)) - (1 << 14);
- int32_t sx = fx & 0xffff8000; // Floor
- int32_t fy = 0, sy = 0;
- int32_t temp1 = 0, temp2 = 0;
-
- fx -= sx;
- sx = sx >> 15;
-
- sx = sx < 0 ? 0 : sx;
- sx = sx > src_width ? src_width - 1: sx;
-
- fx = (fx +(1 << 4)) >> 5;
-
- // for y
- fy = (dy * yRatio + (yRatio >> 1)) - (1<< 14);
- sy = fy & 0xffff8000; // Floor
- fy -= sy;
- sy = sy >> 15;
-
- sy = sy < 0 ? 0 : sy;
- fy = fy < 0 ? 0 : fy;
-
- fy = (fy + (1<< 4)) >> 5;
-
- sx += xOffset;
- sy += yOffset;
- source_index = (sx + sy * src_width + dz * src_width * src_height) * 3;
-
- rline1[0] = (int32_t)buffer[0][source_index];
- rline1[1] = (int32_t)buffer[0][source_index + 3];
- rline2[0] = (int32_t)buffer[0][source_index + src_stride];
- rline2[1] = (int32_t)buffer[0][source_index + src_stride + 3];
-
- gline1[0] = (int32_t)buffer[0][source_index + 1];
- gline1[1] = (int32_t)buffer[0][source_index + 4];
- gline2[0] = (int32_t)buffer[0][source_index + src_stride + 1];
- gline2[1] = (int32_t)buffer[0][source_index + src_stride + 4];
-
- bline1[0] = (int32_t)buffer[0][source_index + 2];
- bline1[1] = (int32_t)buffer[0][source_index + 5];
- bline2[0] = (int32_t)buffer[0][source_index + src_stride + 2];
- bline2[1] = (int32_t)buffer[0][source_index + src_stride + 5];
-
- // R
- temp1 = fx * (rline1[1] - rline1[0]) + (rline1[0] << 10);
- temp2 = fx * (rline2[1] - rline2[0]) + (rline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- R = (uint8_t)(DESCALE(temp1));
- finalVal = (R - rMean) * var;
- buffer[1][dstR_idx] = finalVal;
-
- //G
- temp1 = fx * (gline1[1] - gline1[0]) + (gline1[0] << 10);
- temp2 = fx * (gline2[1] - gline2[0]) + (gline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- G = (uint8_t)(DESCALE(temp1));
- finalVal = (G - gMean) * var;
- buffer[1][dstG_idx] = finalVal;
-
- //B
- temp1 = fx * (bline1[1] - bline1[0]) + (bline1[0] << 10);
- temp2 = fx * (bline2[1] - bline2[0]) + (bline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- B = (uint8_t)(DESCALE(temp1));
- finalVal = (B - bMean) * var;
- buffer[1][dstB_idx] = finalVal;
- }
- else //copy
- {
- int32_t offset = xOffset + yOffset * src_width;
- source_index = (dx + dy * src_width + offset) * 3;
-
- finalVal = (buffer[0][source_index] - rMean) * var;
- buffer[1][dstR_idx] = finalVal;
-
- finalVal = (buffer[0][source_index + 1] - gMean) * var;
- buffer[1][dstG_idx] = finalVal;
-
- finalVal = (buffer[0][source_index + 2] - bMean) * var;
- buffer[1][dstB_idx] = finalVal;
- }
- }
- }
- }
- }
-
- if(trans)
- {
- vsi_size_t shape[] = {attr[1]->shape->data[0], attr[1]->shape->data[1], attr[1]->shape->data[2], 1};
- vsi_size_t perm[] = {1, 2, 0, 3};
- vsi_nn_Transpose((uint8_t*)outBuffer, (uint8_t*)buffer[1],
- shape, (uint32_t)attr[1]->shape->size, perm, VSI_NN_TYPE_FLOAT32);
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- outBuffer, out_elements );
- }
- else
- {
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- }
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- if(outBuffer)
- {
- free(outBuffer);
- }
-
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_rgb_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pre_process_rgb_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 2;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
- float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
- float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
- float rgb_scale = vsi_nn_kernel_param_get_float32( params, "rgb_scale" );
- int32_t reverse = vsi_nn_kernel_param_get_int32( params, "reverse" );
- int32_t trans = vsi_nn_kernel_param_get_int32( params, "enable_perm" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &rgb_scale );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &reverse );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &trans );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[2] );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- vsi_nn_kernel_scalar_release( &backend_params[6] );
- vsi_nn_kernel_scalar_release( &backend_params[7] );
- vsi_nn_kernel_scalar_release( &backend_params[8] );
- vsi_nn_kernel_scalar_release( &backend_params[9] );
- vsi_nn_kernel_scalar_release( &backend_params[10] );
- vsi_nn_kernel_scalar_release( &backend_params[11] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_rgb, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv420_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv420_cpu.c
deleted file mode 100644
index aa814f2..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv420_cpu.c
+++ /dev/null
@@ -1,419 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (10)
-#define _CPU_INPUT_NUM (3)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_yuv420_sw")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-
-DEF_KERNEL_EXECUTOR(_pre_process_yuv420_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- float * outBuffer = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float rMean = 0, gMean = 0, bMean = 0, var = 0;
- int32_t order = 0, trans = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- i = 4;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &rMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &gMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &bMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &var);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &order);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &trans);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create input2 buffer fail.", final );
-
- buffer[3] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
- memset( buffer[3], 0, out_elements * sizeof(float) );
-
- if(trans)
- {
- outBuffer = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( outBuffer, "Create output buffer fail.", final );
- memset( outBuffer, 0, out_elements * sizeof(float) );
- }
-
- {
- uint8_t rline1[2], rline2[2];
- uint8_t gline1[2], gline2[2];
- uint8_t bline1[2], bline2[2];
- int32_t dx, dy, dz;
- int32_t src_width = (int32_t)attr[0]->shape->data[0];
- int32_t src_height = (int32_t)attr[0]->shape->data[1];
- int32_t subWidth = src_width >> 1;
- int32_t subHeight = src_height >> 1;
- int32_t dst_width = (int32_t)(trans ? attr[3]->shape->data[1] : attr[3]->shape->data[0]);
- int32_t dst_height = (int32_t)(trans ? attr[3]->shape->data[2] : attr[3]->shape->data[1]);
- int32_t stride = dst_width * dst_height;
- int32_t rOffset = 0;
- int32_t gOffset = 1 * stride;
- int32_t bOffset = 2 * stride;
- int32_t subIdx = 0;
- int32_t C, D, E;
- uint8_t R, G, B;
- int32_t min = 0;
- int32_t max = 255;
-
- if(order)
- {
- rOffset = 2 * stride;
- bOffset = 0;
- }
-
- for ( dz = 0; dz < 1; dz ++)
- {
- for ( dy = 0; dy < (int32_t)dst_height; dy ++)
- {
- for ( dx = 0; dx < (int32_t)dst_width; dx ++)
- {
- int32_t source_index = 0;
- int32_t output_index = dx + dy * dst_width;
- int32_t dstR_idx = output_index + rOffset;
- int32_t dstG_idx = output_index + gOffset;
- int32_t dstB_idx = output_index + bOffset;
- float finalVal = 0;
-
- if(xRatio != (1 << 15) || yRatio != (1 << 15))
- {
- int32_t fx = (dx * xRatio + (xRatio >> 1)) - (1 << 14);
- int32_t sx = fx & 0xffff8000; // Floor
- int32_t fy = 0, sy = 0;
- int32_t temp1 = 0, temp2 = 0;
-
- fx -= sx;
- sx = sx >> 15;
-
- sx = sx < 0 ? 0 : sx;
- sx = sx > src_width ? src_width - 1: sx;
-
- fx = (fx +(1 << 4)) >> 5;
-
- // for y
- fy = (dy * yRatio + (yRatio >> 1)) - (1<< 14);
- sy = fy & 0xffff8000; // Floor
- fy -= sy;
- sy = sy >> 15;
-
- sy = sy < 0 ? 0 : sy;
- fy = fy < 0 ? 0 : fy;
-
- fy = (fy + (1<< 4)) >> 5;
-
- sx += xOffset;
- sy += yOffset;
- source_index = (sx + sy * src_width + dz * src_width * src_height + 0);
- subIdx = ((sx >> 1) + (sy >> 1) * subWidth + dz * subWidth * subHeight + 0);
-
- /*C = ySrc[source_index] - 16;
- D = uSrc[subIdx] - 128;
- E = vSrc[subIdx] - 128;*/
- C = (int)buffer[0][source_index] - 16;
- D = (int)buffer[1][subIdx] - 128;
- E = (int)buffer[2][subIdx] - 128;
-
- rline1[0] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline1[0] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline1[0] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- // right
- subIdx = (((sx + 1) >> 1) + (sy >> 1) * subWidth + dz * subWidth * subHeight);
- C = (int)buffer[0][source_index + 1] - 16;
- D = (int)buffer[1][subIdx] - 128;
- E = (int)buffer[2][subIdx] - 128;
-
- rline1[1] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline1[1] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline1[1] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- // below
- subIdx = (((sx + 0) >> 1) + ((sy + 1) >> 1) * subWidth + dz * subWidth * subHeight);
- C = (int)buffer[0][source_index + src_width] - 16;
- D = (int)buffer[1][subIdx] - 128;
- E = (int)buffer[2][subIdx] - 128;
-
- rline2[0] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline2[0] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline2[0] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- // below right
- //C = ySrc[source_index + src_width + 1] - 16;
- subIdx = (((sx + 1) >> 1) + ((sy + 1) >> 1) * subWidth + dz * subWidth * subHeight);
- C = (int)buffer[0][source_index + src_width + 1] - 16;
- D = (int)buffer[1][subIdx] - 128;
- E = (int)buffer[2][subIdx] - 128;
-
- rline2[1] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline2[1] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline2[1] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- //B
- temp1 = fx * (bline1[1] - bline1[0]) + (bline1[0] << 10);
- temp2 = fx * (bline2[1] - bline2[0]) + (bline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- B = (uint8_t)(DESCALE(temp1));
- finalVal = (B - bMean) * var;
- buffer[3][dstB_idx] = finalVal;
-
- //G
- temp1 = fx * (gline1[1] - gline1[0]) + (gline1[0] << 10);
- temp2 = fx * (gline2[1] - gline2[0]) + (gline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
-
- G = (uint8_t)(DESCALE(temp1));
- finalVal = (G - gMean) * var;
- buffer[3][dstG_idx] = finalVal;
-
- // R
- temp1 = fx * (rline1[1] - rline1[0]) + (rline1[0] << 10);
- temp2 = fx * (rline2[1] - rline2[0]) + (rline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- R = (uint8_t)(DESCALE(temp1));
- finalVal = (R - rMean) * var;
- buffer[3][dstR_idx] = finalVal;
- }
- else
- {
- // do conversion
- C = (int)buffer[0][source_index] - 16;
- D = (int)buffer[1][subIdx] - 128;
- E = (int)buffer[2][subIdx] - 128;
-
- R = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- G = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- B = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- buffer[3][dstB_idx] = (B - bMean) * var;
- buffer[3][dstG_idx] = (G - gMean) * var;
- buffer[3][dstR_idx] = (R - rMean) * var;
- }
- }
- }
- }
- }
-
- if(trans)
- {
- vsi_size_t shape[] = {attr[3]->shape->data[0], attr[3]->shape->data[1], attr[3]->shape->data[2], 1};
- vsi_size_t perm[] = {1, 2, 0, 3};
- vsi_nn_Transpose((uint8_t*)outBuffer, (uint8_t*)buffer[3],
- shape, (uint32_t)attr[3]->shape->size, perm, VSI_NN_TYPE_FLOAT32);
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- outBuffer, out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- else
- {
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- if(outBuffer)
- {
- free(outBuffer);
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_yuv420_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pre_process_yuv420_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 4;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
- float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
- float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
- float rgb_scale = vsi_nn_kernel_param_get_float32( params, "rgb_scale" );
- int32_t reverse = vsi_nn_kernel_param_get_int32( params, "reverse" );
- int32_t trans = vsi_nn_kernel_param_get_int32( params, "enable_perm" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &rgb_scale );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &reverse );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &trans );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- vsi_nn_kernel_scalar_release( &backend_params[6] );
- vsi_nn_kernel_scalar_release( &backend_params[7] );
- vsi_nn_kernel_scalar_release( &backend_params[8] );
- vsi_nn_kernel_scalar_release( &backend_params[9] );
- vsi_nn_kernel_scalar_release( &backend_params[10] );
- vsi_nn_kernel_scalar_release( &backend_params[11] );
- vsi_nn_kernel_scalar_release( &backend_params[12] );
- vsi_nn_kernel_scalar_release( &backend_params[13] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_yuv420, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv422_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv422_cpu.c
deleted file mode 100644
index 189ef8f..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv422_cpu.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (11)
-#define _CPU_INPUT_NUM (1)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_yuv422_sw")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-
-DEF_KERNEL_EXECUTOR(_pre_process_yuv422_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- float * outBuffer = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float rMean = 0, gMean = 0, bMean = 0, var = 0;
- int32_t order = 0, trans = 0, yuv422_type = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
-
- i = 2;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &rMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &gMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &bMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &var);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &order);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &trans);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yuv422_type);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
- memset( buffer[1], 0, out_elements * sizeof(float) );
-
- if(trans)
- {
- outBuffer = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( outBuffer, "Create output buffer fail.", final );
- memset( outBuffer, 0, out_elements * sizeof(float) );
- }
-
- {
- int32_t dx, dy, dz;
- int32_t src_width = (int32_t)attr[0]->shape->data[0];
- int32_t dst_width = (int32_t)(trans ? attr[1]->shape->data[1] : attr[1]->shape->data[0]);
- int32_t dst_height = (int32_t)(trans ? attr[1]->shape->data[1] : attr[1]->shape->data[1]);
- int32_t stride = (int32_t)(dst_width * dst_height);
- int32_t rOffset = 0;
- int32_t gOffset = 1 * stride;
- int32_t bOffset = 2 * stride;
- float D0, D1, E0, E1;
- float R0, G0, B0, R1, G1, B1;
- float min = 0;
- float max = 255;
- float* src_y_slice = NULL;
-
- uint32_t roi_width = (xRatio * dst_width) >> 15;
- uint32_t roi_height = (yRatio * dst_height) >> 15;
- uint32_t xrIntFloat_16 = (roi_width << 16) / dst_width + 1;
- uint32_t yrIntFloat_16 = (roi_height << 16) / dst_height + 1;
- uint32_t srcy = 0, srcx = 0;
-
- if(attr[1]->dtype == I8)
- {
- min = -128;
- max = 127;
- }
- else if(attr[1]->dtype == I16 || attr[1]->dtype == F16)
- {
- min = -32768;
- max = 32767;
- }
-
- if(order)
- {
- rOffset = 2 * stride;
- bOffset = 0;
- }
-
- for ( dz = 0; dz < 1; dz ++)
- {
- for ( dy = 0; dy < (int32_t)dst_height; dy++)
- {
- srcy = (((uint32_t)dy * yrIntFloat_16) >> 16) + yOffset;
- src_y_slice = buffer[0] + (srcy) * src_width;
- for ( dx = 0; dx < (int32_t)dst_width; dx += 2)
- {
- int32_t output_index = 0;
- int32_t dstR_idx = 0, dstG_idx = 0, dstB_idx = 0;
- float tmpY0 = 0.0f;
- float tmpY1 = 0.0f;
- float tmpU0 = 0.0f;
- float tmpU1 = 0.0f;
- float tmpV0 = 0.0f;
- float tmpV1 = 0.0f;
-
- srcx = ((((uint32_t)dx * xrIntFloat_16) >> 16) + xOffset) * 2;
-
- if (xrIntFloat_16 >> 16 == 1)
- {
- if (yuv422_type == 1)
- {
- tmpY0 = src_y_slice[srcx + 1];
- tmpU0 = src_y_slice[srcx];
- tmpY1 = src_y_slice[srcx + 3];
- tmpV0 = src_y_slice[srcx + 2];
- tmpU1 = tmpU0;
- tmpV1 = tmpV0;
- }
- else
- {
- tmpY0 = src_y_slice[srcx];
- tmpU0 = src_y_slice[srcx + 1];
- tmpY1 = src_y_slice[srcx + 2];
- tmpV0 = src_y_slice[srcx + 3];
- tmpU1 = tmpU0;
- tmpV1 = tmpV0;
- }
- }
- else
- {
- if (yuv422_type == 1)
- {
- tmpY0 = src_y_slice[srcx + 1];
- tmpU0 = src_y_slice[(srcx / 4) * 4];
- tmpV0 = src_y_slice[(srcx / 4) * 4 + 2];
- srcx = (((uint32_t)(dx + 1) * xrIntFloat_16) >> 16) + xOffset;
- srcx = srcx * 2;
- tmpY1 = src_y_slice[srcx + 1];
- tmpU1 = src_y_slice[(srcx / 4) * 4];
- tmpV1 = src_y_slice[(srcx / 4) * 4 + 2];
- }
- else
- {
- tmpY0 = src_y_slice[srcx];
- tmpU0 = src_y_slice[(srcx / 4) * 4 + 1];
- tmpV0 = src_y_slice[(srcx / 4) * 4 + 3];
- srcx = (((uint32_t)(dx + 1) * xrIntFloat_16) >> 16) + xOffset;
- srcx = srcx * 2;
- tmpY1 = src_y_slice[srcx];
- tmpU1 = src_y_slice[(srcx / 4) * 4 + 1];
- tmpV1 = src_y_slice[(srcx / 4) * 4 + 3];
- }
- }
-
- D0 = (tmpU0 - 128);
- E0 = (tmpV0 - 128);
- D1 = (tmpU1 - 128);
- E1 = (tmpV1 - 128);
-
- B0 = (float)vsi_clamp((tmpY0 + (1.7790 * D0)), min, max);
- G0 = (float)vsi_clamp((tmpY0 - 0.3455 * D0 - 0.7169 * E0), min, max);
- R0 = (float)vsi_clamp((tmpY0 + 1.4065 * E0), min, max);
-
- B1 = (float)vsi_clamp((tmpY1 + (1.7790 * D1)), min, max);
- G1 = (float)vsi_clamp((tmpY1 - 0.3455 * D1 - 0.7169 * E1), min, max);
- R1 = (float)vsi_clamp((tmpY1 + 1.4065 * E1), min, max);
-
- output_index = dx + dy * dst_width;
-
- dstR_idx = output_index + rOffset;
- dstG_idx = output_index + gOffset;
- dstB_idx = output_index + bOffset;
-
- buffer[1][dstB_idx] = (B0 - bMean) * var;
- buffer[1][dstG_idx] = (G0 - gMean) * var;
- buffer[1][dstR_idx] = (R0 - rMean) * var;
-
- dstR_idx += 1;
- dstG_idx += 1;
- dstB_idx += 1;
-
- buffer[1][dstB_idx] = (B1 - bMean) * var;
- buffer[1][dstG_idx] = (G1 - gMean) * var;
- buffer[1][dstR_idx] = (R1 - rMean) * var;
- }
- }
- }
- }
-
- if(trans)
- {
- vsi_size_t shape[] = {attr[1]->shape->data[0], attr[1]->shape->data[1], attr[1]->shape->data[2], 1};
- vsi_size_t perm[] = {1, 2, 0, 3};
- vsi_nn_Transpose((uint8_t*)outBuffer, (uint8_t*)buffer[1],
- shape, (uint32_t)attr[1]->shape->size, perm, VSI_NN_TYPE_FLOAT32);
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- outBuffer, out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- else
- {
- status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
- buffer[1], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- if(outBuffer)
- {
- free(outBuffer);
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_yuv422_exec() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pre_process_yuv422_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_CPU_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- status = _query_kernel( kernel, inputs, outputs);
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- uint32_t index = 2;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
- float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
- float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
- float rgb_scale = vsi_nn_kernel_param_get_float32( params, "rgb_scale" );
- int32_t reverse = vsi_nn_kernel_param_get_int32( params, "reverse" );
- int32_t trans = vsi_nn_kernel_param_get_int32( params, "enable_perm" );
- int32_t yuv422_type = vsi_nn_kernel_param_get_int32( params, "yuv422_type" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &rgb_scale );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &reverse );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &trans );
- node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &yuv422_type );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &node_params[2] );
- vsi_nn_kernel_scalar_release( &node_params[3] );
- vsi_nn_kernel_scalar_release( &node_params[4] );
- vsi_nn_kernel_scalar_release( &node_params[5] );
- vsi_nn_kernel_scalar_release( &node_params[6] );
- vsi_nn_kernel_scalar_release( &node_params[7] );
- vsi_nn_kernel_scalar_release( &node_params[8] );
- vsi_nn_kernel_scalar_release( &node_params[9] );
- vsi_nn_kernel_scalar_release( &node_params[10] );
- vsi_nn_kernel_scalar_release( &node_params[11] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_yuv422, _setup )
-
diff --git a/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv444_cpu.c b/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv444_cpu.c
deleted file mode 100644
index 007d9c8..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/pre_process_yuv444_cpu.c
+++ /dev/null
@@ -1,413 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (10)
-#define _CPU_INPUT_NUM (3)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_yuv444_sw")
-
-#define DESCALE(x) (((x) + (1<<19)) >> 20)
-
-DEF_KERNEL_EXECUTOR(_pre_process_yuv444_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- float * outBuffer = NULL;
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i = 0;
- int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
- float rMean = 0, gMean = 0, bMean = 0, var = 0;
- int32_t order = 0, trans = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
- tensors[3] = (vsi_nn_kernel_tensor_t)param[3];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
- attr[3] = vsi_nn_kernel_tensor_attr_create( tensors[3] );
- CHECK_PTR_FAIL_GOTO( attr[3], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
-
- i = 4;
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &rMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &gMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &bMean);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &var);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &order);
- CHECK_STATUS_FAIL_GOTO(status, final );
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &trans);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[2], attr[2], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create input2 buffer fail.", final );
-
- buffer[3] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[3], "Create output buffer fail.", final );
- memset( buffer[3], 0, out_elements * sizeof(float) );
-
- if(trans)
- {
- outBuffer = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( outBuffer, "Create output buffer fail.", final );
- memset( outBuffer, 0, out_elements * sizeof(float) );
- }
-
- {
- uint8_t rline1[2], rline2[2];
- uint8_t gline1[2], gline2[2];
- uint8_t bline1[2], bline2[2];
- int32_t dx, dy, dz;
- int32_t src_width = (int32_t)attr[0]->shape->data[0];
- int32_t src_height = (int32_t)attr[0]->shape->data[1];
- int32_t dst_width = (int32_t)(trans ? attr[3]->shape->data[1] : attr[3]->shape->data[0]);
- int32_t dst_height = (int32_t)(trans ? attr[3]->shape->data[2] : attr[3]->shape->data[1]);
- int32_t stride = dst_width * dst_height;
- int32_t rOffset = 0;
- int32_t gOffset = 1 * stride;
- int32_t bOffset = 2 * stride;
- int32_t C, D, E;
- uint8_t R, G, B;
- int32_t min = 0;
- int32_t max = 255;
-
- if(order)
- {
- rOffset = 2 * stride;
- bOffset = 0;
- }
-
- for ( dz = 0; dz < 1; dz ++)
- {
- for ( dy = 0; dy < (int32_t)dst_height; dy ++)
- {
- for ( dx = 0; dx < (int32_t)dst_width; dx ++)
- {
- int32_t source_index = 0;
- int32_t output_index = dx + dy * dst_width;
- int32_t dstR_idx = output_index + rOffset;
- int32_t dstG_idx = output_index + gOffset;
- int32_t dstB_idx = output_index + bOffset;
- float finalVal = 0;
-
- if(xRatio != (1 << 15) || yRatio != (1 << 15))
- {
- int32_t fx = (dx * xRatio + (xRatio >> 1)) - (1 << 14);
- int32_t sx = fx & 0xffff8000; // Floor
- int32_t fy = 0, sy = 0;
- int32_t temp1 = 0, temp2 = 0;
-
- fx -= sx;
- sx = sx >> 15;
-
- sx = sx < 0 ? 0 : sx;
- sx = sx > src_width ? src_width - 1: sx;
-
- fx = (fx +(1 << 4)) >> 5;
-
- // for y
- fy = (dy * yRatio + (yRatio >> 1)) - (1<< 14);
- sy = fy & 0xffff8000; // Floor
- fy -= sy;
- sy = sy >> 15;
-
- sy = sy < 0 ? 0 : sy;
- fy = fy < 0 ? 0 : fy;
-
- fy = (fy + (1<< 4)) >> 5;
-
- sx += xOffset;
- sy += yOffset;
- source_index = (sx + sy * src_width + dz * src_width * src_height + 0);
-
- /*C = ySrc[source_index] - 16;
- D = uSrc[subIdx] - 128;
- E = vSrc[subIdx] - 128;*/
- C = (int)buffer[0][source_index] - 16;
- D = (int)buffer[1][source_index] - 128;
- E = (int)buffer[2][source_index] - 128;
-
- rline1[0] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline1[0] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline1[0] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- // right
- C = (int)buffer[0][source_index + 1] - 16;
- D = (int)buffer[1][source_index + 1] - 128;
- E = (int)buffer[2][source_index + 1] - 128;
-
- rline1[1] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline1[1] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline1[1] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- // below
- C = (int)buffer[0][source_index + src_width] - 16;
- D = (int)buffer[1][source_index + src_width] - 128;
- E = (int)buffer[2][source_index + src_width] - 128;
-
- rline2[0] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline2[0] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline2[0] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- // below right
- //C = ySrc[source_index + src_width + 1] - 16;
- C = (int)buffer[0][source_index + src_width + 1] - 16;
- D = (int)buffer[1][source_index + src_width + 1] - 128;
- E = (int)buffer[2][source_index + src_width + 1] - 128;
-
- rline2[1] = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- gline2[1] = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- bline2[1] = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- //B
- temp1 = fx * (bline1[1] - bline1[0]) + (bline1[0] << 10);
- temp2 = fx * (bline2[1] - bline2[0]) + (bline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- B = (uint8_t)(DESCALE(temp1));
- finalVal = (B - bMean) * var;
- buffer[3][dstB_idx] = finalVal;
-
- //G
- temp1 = fx * (gline1[1] - gline1[0]) + (gline1[0] << 10);
- temp2 = fx * (gline2[1] - gline2[0]) + (gline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
-
- G = (uint8_t)(DESCALE(temp1));
- finalVal = (G - gMean) * var;
- buffer[3][dstG_idx] = finalVal;
-
- // R
- temp1 = fx * (rline1[1] - rline1[0]) + (rline1[0] << 10);
- temp2 = fx * (rline2[1] - rline2[0]) + (rline2[0] << 10);
- temp1 = fy * (temp2 - temp1) + (temp1 << 10);
- R = (uint8_t)(DESCALE(temp1));
- finalVal = (R - rMean) * var;
- buffer[3][dstR_idx] = finalVal;
- }
- else
- {
- // do conversion
- C = (int)buffer[0][source_index] - 16;
- D = (int)buffer[1][source_index] - 128;
- E = (int)buffer[2][source_index] - 128;
-
- R = (uint8_t)vsi_clamp((298 * C + 409 * E + 128) >> 8, min, max);
- G = (uint8_t)vsi_clamp((298 * C - 100* D - 208 * E + 128) >> 8, min, max);
- B = (uint8_t)vsi_clamp((298 * C + 516 * D + 128) >> 8, min, max);
-
- buffer[3][dstB_idx] = (B - bMean) * var;
- buffer[3][dstG_idx] = (G - gMean) * var;
- buffer[3][dstR_idx] = (R - rMean) * var;
- }
- }
- }
- }
- }
-
- if(trans)
- {
- vsi_size_t shape[] = {attr[3]->shape->data[0], attr[3]->shape->data[1], attr[3]->shape->data[2], 1};
- vsi_size_t perm[] = {1, 2, 0, 3};
- vsi_nn_Transpose((uint8_t*)outBuffer, (uint8_t*)buffer[3],
- shape, (uint32_t)attr[3]->shape->size, perm, VSI_NN_TYPE_FLOAT32);
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- outBuffer, out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
- else
- {
- status = vsi_nn_kernel_tensor_write_from_float( tensors[3], attr[3],
- buffer[3], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- if(outBuffer)
- {
- free(outBuffer);
- }
-
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _pre_process_yuv444_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _pre_process_yuv444_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VX_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- uint32_t index = 4;
- int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
- int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
- int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
- int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
- float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
- float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
- float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
- float rgb_scale = vsi_nn_kernel_param_get_float32( params, "rgb_scale" );
- int32_t reverse = vsi_nn_kernel_param_get_int32( params, "reverse" );
- int32_t trans = vsi_nn_kernel_param_get_int32( params, "enable_perm" );
-
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
-
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &rgb_scale );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &reverse );
- backend_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &trans );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[4] );
- vsi_nn_kernel_scalar_release( &backend_params[5] );
- vsi_nn_kernel_scalar_release( &backend_params[6] );
- vsi_nn_kernel_scalar_release( &backend_params[7] );
- vsi_nn_kernel_scalar_release( &backend_params[8] );
- vsi_nn_kernel_scalar_release( &backend_params[9] );
- vsi_nn_kernel_scalar_release( &backend_params[10] );
- vsi_nn_kernel_scalar_release( &backend_params[11] );
- vsi_nn_kernel_scalar_release( &backend_params[12] );
- vsi_nn_kernel_scalar_release( &backend_params[13] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( pre_process_yuv444, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/prelu_cpu.c b/src/tim/vx/internal/src/kernel/cpu/prelu_cpu.c
deleted file mode 100644
index 7209c9a..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/prelu_cpu.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-#define _CPU_ARG_NUM (0)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("prelu_sw")
-
-static vsi_ssize_t _expand_offset
- (
- vsi_ssize_t index,
- vsi_size_t * shape, vsi_size_t rank,
- vsi_size_t * strides, vsi_size_t * out_shape
- )
-{
- vsi_size_t i;
- vsi_ssize_t offset = 0;
-
- for( i = 0; i < rank && index; i ++ )
- {
- if( shape[i] == out_shape[i] )
- {
- offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
- }
- index /= out_shape[i];
- }
- return offset;
-}
-
-DEF_KERNEL_EXECUTOR(_prelu_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VX_SUCCESS;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- vsi_size_t out_elements = 0;
- vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t i;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
-
- vsi_nn_shape_get_stride( attr[0]->shape->data, (vsi_size_t)attr[0]->shape->size, stride_size[0] );
- vsi_nn_shape_get_stride( attr[1]->shape->data, (vsi_size_t)attr[1]->shape->size, stride_size[1] );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- for( i = 0; i < out_elements; i ++ )
- {
- vsi_ssize_t in0_offset = 0;
- vsi_ssize_t in1_offset = 0;
- float val1 = 0.f;
- float val2 = 0.f;
-
- in0_offset = _expand_offset( i, attr[0]->shape->data, (vsi_size_t)attr[0]->shape->size,
- stride_size[0], attr[2]->shape->data );
- in1_offset = _expand_offset( i, attr[1]->shape->data, (vsi_size_t)attr[1]->shape->size,
- stride_size[1], attr[2]->shape->data );
-
- val1 = buffer[0][in0_offset];
- val2 = buffer[1][in1_offset];
-
-
- buffer[2][i] = val1 >= 0 ? val1 : val1 * val2;
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
- return status;
-} /* _prelu_exec() */
-
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
-};
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _prelu_exec;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_SUCCESS;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t is_per_channel_alpha = 0;
-
- is_per_channel_alpha = vsi_nn_kernel_param_get_int32(params, "is_per_channel_alpha");
-
- if (is_per_channel_alpha)
- {
- return NULL;
- }
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( prelu, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/random_multinomial_cpu.c b/src/tim/vx/internal/src/kernel/cpu/random_multinomial_cpu.c
deleted file mode 100644
index 3bd40d6..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/random_multinomial_cpu.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "utils/vsi_nn_dtype_util.h"
-#include "kernel/vsi_nn_kernel.h"
-#include "kernel/vsi_nn_kernel_eltwise.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (0)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("random_multinomial_sw")
-
-/*
- * Kernel params
- */
-static vx_param_description_t kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _PARAM_NUM _cnt_of_array( kernel_param_def )
-
-/*
- * Kernel function
- */
-static int upper_bound(float* a, int n, float x) {
- int l = 0;
- int h = n;
- while (l < h) {
- int mid = (l + h) / 2;
- if (x >= a[mid]) {
- l = mid + 1;
- } else {
- h = mid;
- }
- }
- return l;
-} /* upper_bound() */
-
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- vsi_size_t out_elements = 0;
- vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- uint32_t *random_integer = NULL;
- float *random_float = NULL;
- float *cdf = NULL;
- uint32_t i = 0;
- uint32_t n = 0;
- uint32_t batch = 0;
- uint32_t class_size = 0;
- int32_t sample_num = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- sample_num = (int32_t)attr[2]->shape->data[0];
- batch = (int32_t)attr[0]->shape->data[1];
- class_size = (int32_t)attr[0]->shape->data[0];
-
- vsi_nn_kernel_tensor_attr_get_stride( attr[0], stride_size[0] );
- vsi_nn_kernel_tensor_attr_get_stride( attr[1], stride_size[1] );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input1 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- random_integer = (uint32_t *)malloc(out_elements * sizeof(uint32_t));
- CHECK_PTR_FAIL_GOTO( random_integer, "Create buffer fail.", final );
- random_float = (float *)malloc(out_elements * sizeof(float));
- CHECK_PTR_FAIL_GOTO( random_float, "Create buffer fail.", final );
- cdf = (float *)malloc(class_size * sizeof(float));
- CHECK_PTR_FAIL_GOTO( cdf, "Create buffer fail.", final );
-
- vsi_nn_random_init_for_philox_4x32_10((uint32_t)(buffer[1][0]),
- (uint32_t)(buffer[1][1]));
- vsi_nn_random_generate_by_philox_4x32_10(random_integer, (uint32_t)out_elements);
- vsi_nn_random_uniform_transform(random_integer,
- random_float, (uint32_t)out_elements);
-
- for (n = 0; n < batch; n++)
- {
- uint32_t c = 0;
- float batch_max = -FLT_MAX;
- float total = 0;
- for(c = 0; c < class_size; c++)
- {
- uint32_t index = n * class_size + c;
- batch_max = vsi_nn_max(batch_max, buffer[0][index]);
- }
-
- for(c = 0; c < class_size; c++)
- {
- uint32_t index = n * class_size + c;
- total += (float)(exp(buffer[0][index] - batch_max));
- cdf[c] = total;
- }
-
- for(c = 0; c < (uint32_t)sample_num; c++)
- {
- uint32_t index = n * sample_num + c;
- float target = random_float[index] * total;
- uint32_t out_class = upper_bound(cdf, class_size, target);
- buffer[2][index] = (float)out_class;
- }
- }
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- vsi_nn_kernel_tensor_attr_release( &attr[i] );
- }
-
- if (cdf)
- {
- free(cdf);
- cdf = NULL;
- }
- if (random_integer)
- {
- free(random_integer);
- random_integer = NULL;
- }
- if (random_float)
- {
- free(random_float);
- random_float = NULL;
- }
-
- return status;
-} /* _compute() */
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = kernel_param_def;
- kernel->info.numParams = _cnt_of_array( kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
-
- status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _PARAM_NUM,
- inputs, input_num, outputs, output_num );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _PARAM_NUM );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( random_multinomial, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/reduceall_internal_cpu.c b/src/tim/vx/internal/src/kernel/cpu/reduceall_internal_cpu.c
deleted file mode 100644
index 5999b8c..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/reduceall_internal_cpu.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.reduceall_internal")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _reduceall_internal_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _REDUCEALL_INTERNAL_PARAM_NUM _cnt_of_array( _reduceall_internal_kernel_param_def )
-
-#define SCALAR_INPUT_AXIS (2)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
- int32_t all_result = 0;
-
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_AXIS], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for (i = 0; i < (uint32_t)axis; i++)
- {
- innerSize *= in_attr[0]->shape->data[i];
- }
-
- axisSize = in_attr[0]->shape->data[axis];
-
- for (i = (uint32_t)axis + 1; i < in_attr[0]->shape->size; i++)
- {
- outerSize *= in_attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- all_result = (!!(f32_in_buffer[0][outer * axisSize * innerSize + inner]));
- for (i = 1; i < (uint32_t)axisSize; ++i)
- {
- int32_t value = (!!(f32_in_buffer[0][(outer * axisSize + i) * innerSize + inner]));
- all_result = all_result && value;
- }
- f32_out_buffer[0][outer * innerSize + inner] = (float)all_result;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _reduceall_internal_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _reduceall_internal_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_REDUCEALL_INTERNAL_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _REDUCEALL_INTERNAL_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _REDUCEALL_INTERNAL_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( reduceall_internal, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/reduceany_internal_cpu.c b/src/tim/vx/internal/src/kernel/cpu/reduceany_internal_cpu.c
deleted file mode 100644
index 39a2ff4..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/reduceany_internal_cpu.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.reduceany_internal")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _reduceany_internal_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _REDUCEANY_INTERNAL_PARAM_NUM _cnt_of_array( _reduceany_internal_kernel_param_def )
-
-#define SCALAR_INPUT_AXIS (2)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
- int32_t any_result = 0;
-
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_AXIS], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for (i = 0; i < (uint32_t)axis; i++)
- {
- innerSize *= in_attr[0]->shape->data[i];
- }
-
- axisSize = in_attr[0]->shape->data[axis];
-
- for (i = (uint32_t)axis + 1; i < in_attr[0]->shape->size; i++)
- {
- outerSize *= in_attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- any_result = (!!(f32_in_buffer[0][outer * axisSize * innerSize + inner]));
- for (i = 1; i < (uint32_t)axisSize; ++i)
- {
- int32_t value = (!!(f32_in_buffer[0][(outer * axisSize + i) * innerSize + inner]));
- any_result = any_result || value;
- }
- f32_out_buffer[0][outer * innerSize + inner] = (float)any_result;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _reduceany_internal_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _reduceany_internal_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_REDUCEANY_INTERNAL_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _REDUCEANY_INTERNAL_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _REDUCEANY_INTERNAL_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( reduceany_internal, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/reducemax_internal_cpu.c b/src/tim/vx/internal/src/kernel/cpu/reducemax_internal_cpu.c
deleted file mode 100644
index c1f688c..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/reducemax_internal_cpu.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.reducemax_internal")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _reducemax_internal_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
-};
-#define _REDUCEMAX_INTERNAL_PARAM_NUM _cnt_of_array( _reducemax_internal_kernel_param_def )
-
-#define SCALAR_INPUT_AXIS (2)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
- float maxValue = 0.0f;
-
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_AXIS], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for (i = 0; i < (uint32_t)axis; i++)
- {
- innerSize *= in_attr[0]->shape->data[i];
- }
-
- axisSize = in_attr[0]->shape->data[axis];
-
- for (i = (uint32_t)axis + 1; i < in_attr[0]->shape->size; i++)
- {
- outerSize *= in_attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- maxValue = f32_in_buffer[0][outer * axisSize * innerSize + inner];
- for (i = 1; i < (uint32_t)axisSize; ++i)
- {
- float value = f32_in_buffer[0][(outer * axisSize + i) * innerSize + inner];
- maxValue = vsi_nn_max(maxValue, value);
- }
- f32_out_buffer[0][outer * innerSize + inner] = (float)maxValue;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _reducemax_internal_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _reducemax_internal_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_REDUCEMAX_INTERNAL_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _REDUCEMAX_INTERNAL_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _REDUCEMAX_INTERNAL_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( reducemax_internal, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/reducemin_internal_cpu.c b/src/tim/vx/internal/src/kernel/cpu/reducemin_internal_cpu.c
deleted file mode 100644
index 3151853..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/reducemin_internal_cpu.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.reducemin_internal")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _reducemin_internal_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
-};
-#define _REDUCEMIN_INTERNAL_PARAM_NUM _cnt_of_array( _reducemin_internal_kernel_param_def )
-
-#define SCALAR_INPUT_AXIS (2)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
- float minValue = 0.0f;
-
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_AXIS], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for (i = 0; i < (uint32_t)axis; i++)
- {
- innerSize *= in_attr[0]->shape->data[i];
- }
-
- axisSize = in_attr[0]->shape->data[axis];
-
- for (i = (uint32_t)axis + 1; i < in_attr[0]->shape->size; i++)
- {
- outerSize *= in_attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- minValue = f32_in_buffer[0][outer * axisSize * innerSize + inner];
- for (i = 1; i < (uint32_t)axisSize; ++i)
- {
- float value = f32_in_buffer[0][(outer * axisSize + i) * innerSize + inner];
- minValue = vsi_nn_min(minValue, value);
- }
- f32_out_buffer[0][outer * innerSize + inner] = (float)minValue;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _reducemin_internal_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _reducemin_internal_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_REDUCEMIN_INTERNAL_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _REDUCEMIN_INTERNAL_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _REDUCEMIN_INTERNAL_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( reducemin_internal, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/reduceprod_internal_cpu.c b/src/tim/vx/internal/src/kernel/cpu/reduceprod_internal_cpu.c
deleted file mode 100644
index 64b87c8..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/reduceprod_internal_cpu.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.reduceprod_internal")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _reduceprod_internal_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _REDUCEPROD_INTERNAL_PARAM_NUM _cnt_of_array( _reduceprod_internal_kernel_param_def )
-
-#define SCALAR_INPUT_AXIS (2)
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t axisSize = 1;
- vsi_ssize_t innerSize = 1;
- vsi_ssize_t inner = 0;
- vsi_ssize_t outer = 0;
- float prodValue = 0.0f;
-
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_AXIS], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- for (i = 0; i < (uint32_t)axis; i++)
- {
- innerSize *= in_attr[0]->shape->data[i];
- }
-
- axisSize = in_attr[0]->shape->data[axis];
-
- for (i = (uint32_t)axis + 1; i < in_attr[0]->shape->size; i++)
- {
- outerSize *= in_attr[0]->shape->data[i];
- }
-
- for ( outer = 0; outer < outerSize; ++outer)
- {
- for ( inner = 0; inner < innerSize; ++inner)
- {
- prodValue = f32_in_buffer[0][outer * axisSize * innerSize + inner];
- for (i = 1; i < (uint32_t)axisSize; ++i)
- {
- float value = f32_in_buffer[0][(outer * axisSize + i) * innerSize + inner];
- prodValue = prodValue * value;
- }
- f32_out_buffer[0][outer * innerSize + inner] = (float)prodValue;
- }
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _reduceprod_internal_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _reduceprod_internal_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_REDUCEPROD_INTERNAL_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = 0;
-
- axis = vsi_nn_kernel_param_get_int32(params, "axis");
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _REDUCEPROD_INTERNAL_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
- graph, I32, &axis );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _REDUCEPROD_INTERNAL_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( reduceprod_internal, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/relu_keras_cpu.c b/src/tim/vx/internal/src/kernel/cpu/relu_keras_cpu.c
deleted file mode 100644
index 3c4630d..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/relu_keras_cpu.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.relu_keras")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _relu_keras_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _RELU_KERAS_PARAM_NUM _cnt_of_array( _relu_keras_kernel_param_def )
-
-#define SCALAR_ALPHA (2)
-#define SCALAR_MAX_VALUE (3)
-#define SCALAR_THRESHOLD (4)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- float alpha = 0.0f;
- float max_value = 0.0f;
- float threshold = 0.0f;
-
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_ALPHA], &(alpha));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MAX_VALUE], &(max_value));
- vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_THRESHOLD], &(threshold));
-
- for (i = 0; i < out_elements[0]; i++)
- {
- float data = f32_in_buffer[0][i];
-
- data = data >= max_value ? max_value : data;
- data = data < threshold ? alpha * (data - threshold) : data;
- f32_out_buffer[0][i] = data;
- }
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _relu_keras_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _relu_keras_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_RELU_KERAS_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
- float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
- float threshold = vsi_nn_kernel_param_get_float32( params, "threshold" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _RELU_KERAS_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_ALPHA] = vsi_nn_kernel_scalar_create( graph, F32, &alpha );
- node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
- node_params[SCALAR_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &threshold );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _RELU_KERAS_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALPHA] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_VALUE] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_THRESHOLD] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( relu_keras, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/repeat_cpu.c b/src/tim/vx/internal/src/kernel/cpu/repeat_cpu.c
deleted file mode 100644
index 3021604..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/repeat_cpu.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2019 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "vsi_nn_error.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _CPU_ARG_NUM (1)
-#define _CPU_INPUT_NUM (2)
-#define _CPU_OUTPUT_NUM (1)
-#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
-#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.repeat")
-
-DEF_KERNEL_EXECUTOR(_repeat_exec)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
- float * buffer[_CPU_IO_NUM] = { NULL };
- size_t out_elements = 0;
- vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
- vsi_ssize_t i = 0, j = 0, b = 0, c = 0;
- int32_t axis = 0;
- vsi_ssize_t outerSize = 1;
- vsi_ssize_t outIdx = 0;
- vsi_ssize_t width = 0, height = 0, channel = 0, batch = 0;
- vsi_ssize_t spatial = 0, vol = 0;
-
- tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
- tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
- tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
-
- attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
- CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
- attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
- CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
- attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
- CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
-
- out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
-
- status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
- CHECK_STATUS_FAIL_GOTO(status, final );
-
- buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
-
- buffer[1] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], TRUE );
- CHECK_PTR_FAIL_GOTO( buffer[1], "Create input0 buffer fail.", final );
-
- buffer[2] = (float *)malloc( out_elements * sizeof(float) );
- CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
- memset( buffer[2], 0, out_elements * sizeof(float) );
-
- width = attr[0]->shape->data[0];
- height = attr[0]->shape->data[1];
- channel = attr[0]->shape->size > 2 ? attr[0]->shape->data[2] : 1;
- batch = attr[0]->shape->size > 3 ? attr[0]->shape->data[3] : 1;
- spatial = width * height;
- vol = spatial * channel;
-
- for(i = 1; i < (int32_t)attr[0]->shape->size; i++)
- {
- outerSize *= attr[0]->shape->data[i];
- }
-
- if (axis == 0 && outerSize == 1)
- {
- for(i = 0; i < width; i++)
- {
- float data = buffer[0][i];
- int32_t len = (int32_t)buffer[1][i];
- for(j = 0; j < len; j++)
- {
- buffer[2][outIdx] = data;
- }
- }
- }
- else if (axis == 0)
- {
- for(b = 0; b < batch; b++)
- {
- for(c = 0; c < channel; c++)
- {
- for(i = 0; i < height; i++)
- {
- vsi_ssize_t len = (int32_t)buffer[1][i];
- vsi_ssize_t offset = i * width + c * spatial + b * vol;
- for(j = 0; j < len; j++)
- {
- memcpy(buffer[2] + outIdx, buffer[0] + offset, sizeof(float) * width);
- outIdx += width;
- }
- }
- }
- }
- }
- else if (axis == 1)
- {
- for(b = 0; b < batch; b++)
- {
- for(c = 0; c < channel; c++)
- {
- for(i = 0; i < height; i++)
- {
- vsi_ssize_t offset = i * width + c * spatial + b * vol;
- for(j = 0; j < width; j++)
- {
- vsi_ssize_t len = (vsi_ssize_t)buffer[1][j];
- float data = buffer[0][offset + j];
- vsi_ssize_t k = 0;
- for(k = 0; k < len; k++)
- {
- buffer[2][outIdx++] = data;
- }
- }
- }
- }
- }
- }
- else if (axis == 2)
- {
- for(b = 0; b < batch; b++)
- {
- for(c = 0; c < channel; c++)
- {
- vsi_ssize_t len = (vsi_ssize_t)buffer[1][c];
- vsi_ssize_t offset = c * spatial + b * vol;
-
- for(j = 0; j < len; j++)
- {
- memcpy(buffer[2] + outIdx, buffer[0] + offset, sizeof(float) * spatial);
- outIdx += spatial;
- }
- }
- }
- }
- else
- {
- VSILOGE("axis is not support");
- status = VSI_FAILURE;
- goto final;
- }
-
- status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
- buffer[2], out_elements );
- CHECK_STATUS_FAIL_GOTO( status, final );
-
-final:
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if( buffer[i] )
- {
- free( buffer[i] );
- }
- }
- for( i = 0; i < _CPU_IO_NUM; i ++ )
- {
- if(attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
- }
- return status;
-} /* _repeat_exec() */
-/*
- * Kernel params
- */
-static vx_param_description_t _repeat_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- // Add kererl parameters here
-};
-#define _REPEAT_PARAM_NUM _cnt_of_array( _repeat_kernel_param_def )
-
-static vsi_status _query_kernel
- (
- vsi_nn_tensor_t* const* const inputs,
- vsi_nn_tensor_t* const* const outputs,
- vsi_nn_kernel_t* kernel
- )
-{
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _repeat_exec;
- kernel->info.parameters = _repeat_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _repeat_kernel_param_def );
-
- return VSI_SUCCESS;
-} /* _query_kernel() */
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t axis = vsi_nn_kernel_param_get_int32( params, "axis" );
-
- status = _query_kernel( inputs, outputs, kernel );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
- inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
- backend_params[3] = vsi_nn_kernel_scalar_create( graph, I32, &axis );
-
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
- CHECK_STATUS( status );
- vsi_nn_kernel_scalar_release( &backend_params[3] );
- }
- else
- {
- status = VSI_FAILURE;
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( repeat, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c b/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c
deleted file mode 100644
index ed1eff5..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_1d_bilinear")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _RESIZE_1D_BILINEAR_PARAM_NUM _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
-
-#define SCALAR_ALIGN_CORNERS (2)
-#define SCALAR_HALF_PIXEL (3)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i = 0;
- int32_t align_corners = 0;
- int32_t half_pixel_centers = 0;
- float width_scale = 1.0f;
- vsi_size_t input_width = 0, output_width = 0;
- uint32_t w = 0, out = 0;
- vsi_size_t output_dims = 0;
- float data00 = .0f, data01 = .0f, interpolation = .0f;
- vsi_size_t index = 0;
- vsi_size_t outer = 0;
-
- /* prepare data */
- for (i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
- input_width = in_attr[0]->shape->data[0];
- output_width = out_attr[0]->shape->data[0];
- output_dims = (vsi_size_t)out_attr[0]->shape->size;
-
- if (align_corners && output_width > 1)
- {
- width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
- }
- else
- {
- width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
- }
-
- outer = 1;
-
- for (i = 1; i < output_dims; i++)
- {
- outer = outer * out_attr[0]->shape->data[i];
- }
-
- for (out = 0; out < outer; out++)
- {
- vsi_ssize_t input_base = out * input_width;
- vsi_ssize_t output_base = out * output_width;
- for (w = 0; w < output_width; w ++)
- {
- vx_float32 input_w;
- vsi_ssize_t w0;
- vsi_ssize_t w1;
- if (half_pixel_centers)
- {
- input_w = ((vx_float32)w + 0.5f) * width_scale - 0.5f;
- }
- else
- {
- input_w = w * width_scale;
- }
- w0 = (vsi_ssize_t)input_w;
- w1 = input_w < 0 ? 0 : vsi_nn_min(w0 + 1, (vsi_ssize_t)(input_width - 1));
- index = input_base + w0;
- data00 = f32_in_buffer[0][index];
- index = input_base + w1;
- data01 = f32_in_buffer[0][index];
-
- interpolation = data00 * (1 - (input_w - w0)) +
- data01 * (input_w - w0);
- index = output_base + w;
- f32_out_buffer[0][index] = interpolation;
- }
- }
-
-
- /* save data */
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _resize_1d_bilinear_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_BILINEAR_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
- int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_BILINEAR_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
- node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_BILINEAR_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( resize_1d_bilinear, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c b/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c
deleted file mode 100644
index 195353d..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_1d_nearest")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _RESIZE_1D_NEAREST_PARAM_NUM _cnt_of_array( _resize_1d_nearest_kernel_param_def )
-
-#define SCALAR_ALIGN_CORNERS (2)
-#define SCALAR_HALF_PIXEL (3)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i = 0;
- int32_t align_corners = 0;
- int32_t half_pixel_centers = 0;
- float width_scale = 1.0f;
- vsi_size_t input_width = 0, output_width = 0;
- vsi_size_t w = 0, out = 0;
- vsi_size_t output_dims = 0;
- vsi_size_t outer = 0;
- /* prepare data */
- for (i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for (i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
- input_width = in_attr[0]->shape->data[0];
- output_width = out_attr[0]->shape->data[0];
- output_dims = (uint32_t)out_attr[0]->shape->size;
-
- if (align_corners && output_width > 1)
- {
- width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
- }
- else
- {
- width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
- }
-
- outer = 1;
-
- for (i = 1; i < output_dims; i++)
- {
- outer = outer * out_attr[0]->shape->data[i];
- }
-
- for (out = 0; out < outer; out++)
- {
- vsi_ssize_t input_base = out * input_width;
- vsi_ssize_t output_base = out * output_width;
-
- for (w = 0; w < output_width; w ++)
- {
- float input_w;
- vsi_size_t in_x;
- vsi_ssize_t in_index;
- vsi_ssize_t out_index;
-
- if (half_pixel_centers)
- {
- input_w = ((float)w + 0.5f) * width_scale;
- }
- else
- {
- input_w = w * width_scale;
- }
- if (align_corners)
- {
- in_x = vsi_nn_min((vsi_size_t)simple_round(input_w), input_width - 1);
- }
- else
- {
- in_x = vsi_nn_min((vsi_size_t)floorf(input_w), input_width - 1);
- }
- in_index = in_x + input_base;
- out_index = w + output_base;
- f32_out_buffer[0][out_index] = f32_in_buffer[0][in_index];
- }
- }
-
- /* save data */
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
-
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- /* Add extra params */
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _resize_1d_nearest_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM];
- vsi_nn_kernel_node_t node = NULL;
- int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
- int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if ( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if ( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
- node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( resize_1d_nearest, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/resize_bilinear_cpu.c b/src/tim/vx/internal/src/kernel/cpu/resize_bilinear_cpu.c
deleted file mode 100644
index 6b7a3d9..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/resize_bilinear_cpu.c
+++ /dev/null
@@ -1,311 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_bilinear")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _resize_bilinear_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _RESIZE_BILINEAR_PARAM_NUM _cnt_of_array( _resize_bilinear_kernel_param_def )
-
-#define SCALAR_ALIGN_CORNERS (2)
-#define SCALAR_HALF_PIXEL (3)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- int32_t align_corners;
- int32_t half_pixel_centers;
- float width_scale;
- float height_scale;
- vsi_size_t input_width, output_width, input_height, output_height;
- vsi_size_t b = 0, d = 0, w = 0, h = 0;
- vsi_size_t output_depth, input_depth;
- vsi_size_t output_batch;
- vsi_size_t output_dims, input_dims;
- float data00 = .0f, data01 = .0f, data10 = .0f, data11 = .0f, interpolation = .0f;
- vsi_size_t input_width_orig;
- vsi_size_t output_width_orig;
- vsi_size_t index;
-
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
- input_width = in_attr[0]->shape->data[0];
- input_height = in_attr[0]->shape->data[1];
- output_width = out_attr[0]->shape->data[0];
- output_height = out_attr[0]->shape->data[1];
- output_dims = (vsi_size_t)out_attr[0]->shape->size;
- output_depth = output_dims > 2 ? out_attr[0]->shape->data[2] : 1;
- output_batch = output_dims > 3 ? out_attr[0]->shape->data[3] : 1;
- input_dims = (vsi_size_t)in_attr[0]->shape->size;
- input_depth = input_dims > 2 ? in_attr[0]->shape->data[2] : 1;
- input_width_orig = input_width;
- output_width_orig = output_width;
-
- if (align_corners && output_width > 1)
- {
- width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
- }
- else
- {
- width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
- }
-
- if (align_corners && output_height > 1)
- {
- height_scale = ((vx_float32)(input_height - 1) * 1.0f) / (vx_float32)(output_height - 1);
- }
- else
- {
- height_scale = ((vx_float32)input_height * 1.0f) / (vx_float32)output_height;
- }
-
- for (b = 0; b < output_batch; b ++)
- {
- for (d = 0; d < output_depth; d ++)
- {
- vsi_ssize_t input_base = b * input_depth * input_width_orig * input_height \
- + d * input_width_orig * input_height;
- vsi_ssize_t output_base = b * output_depth * output_width_orig * output_height \
- + d * output_width_orig * output_height;
-
- for (h = 0; h < output_height; h ++)
- {
- vx_float32 input_h = h * height_scale;
- vsi_size_t h0;
- vsi_size_t h1;
-
- if (half_pixel_centers)
- {
- input_h = ((vx_float32)h + 0.5f) * height_scale - 0.5f;
- }
- else
- {
- input_h = h * height_scale;
- }
- h0 = (vsi_size_t)input_h;
- h1 = input_h < 0 ? 0 : vsi_nn_min(h0 + 1, input_height - 1);
- for (w = 0; w < output_width; w ++)
- {
- vx_float32 input_w;
- vsi_ssize_t w0;
- vsi_ssize_t w1;
- if (half_pixel_centers)
- {
- input_w = ((vx_float32)w + 0.5f) * width_scale - 0.5f;
- }
- else
- {
- input_w = w * width_scale;
- }
- w0 = (vsi_ssize_t)input_w;
- w1 = input_w < 0 ? 0 : vsi_nn_min(w0 + 1, (vsi_ssize_t)(input_width - 1));
- index = input_base + h0 * input_width_orig + w0;
- data00 = f32_in_buffer[0][index];
- index = input_base + h0 * input_width_orig + w1;
- data01 = f32_in_buffer[0][index];
- index = input_base + h1 * input_width_orig + w0;
- data10 = f32_in_buffer[0][index];
- index = input_base + h1 * input_width_orig + w1;
- data11 = f32_in_buffer[0][index];
-
- interpolation = data00 * (1 - (input_h - h0)) * (1 - (input_w - w0)) +
- data10 * (input_h - h0) * (1 - (input_w - w0)) +
- data01 * (1 - (input_h - h0)) * (input_w - w0) +
- data11 * (input_h - h0) * (input_w - w0);
- index = output_base + h * output_width_orig + w;
- f32_out_buffer[0][index] = interpolation;
- }
- }
- }
- }
-
-
-
- /* save data */
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for(i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _resize_bilinear_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _resize_bilinear_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_RESIZE_BILINEAR_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
- int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _RESIZE_BILINEAR_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
- node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_BILINEAR_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( resize_bilinear, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/resize_bilinear_nhwc_cpu.c b/src/tim/vx/internal/src/kernel/cpu/resize_bilinear_nhwc_cpu.c
deleted file mode 100644
index 61690c3..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/resize_bilinear_nhwc_cpu.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2021 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include
-#include "vsi_nn_types.h"
-#include "vsi_nn_tensor.h"
-#include "vsi_nn_graph.h"
-#include "vsi_nn_log.h"
-#include "vsi_nn_error.h"
-#include "vsi_nn_prv.h"
-#include "vsi_nn_tensor_util.h"
-#include "utils/vsi_nn_util.h"
-#include "kernel/vsi_nn_kernel.h"
-
-__BEGIN_DECLS
-
-/*
- * Define kernel meta.
- */
-#define _INPUT_NUM (1)
-#define _OUTPUT_NUM (1)
-#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_bilinear_nhwc")
-
-
-/*
- * Kernel params
- */
-static vx_param_description_t _resize_bilinear_nhwc_kernel_param_def[] =
-{
- {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
- {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
-};
-#define _RESIZE_BILINEAR_NHWC_PARAM_NUM _cnt_of_array( _resize_bilinear_nhwc_kernel_param_def )
-
-#define SCALAR_ALIGN_CORNERS (2)
-#define SCALAR_HALF_PIXEL (3)
-
-/*
- * Kernel function
- */
-DEF_KERNEL_EXECUTOR(_compute)
- (
- vsi_nn_kernel_node_t node,
- const vsi_nn_kernel_node_param_t * param,
- size_t param_size
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
- float *f32_in_buffer[_INPUT_NUM] = {NULL};
- float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
- vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
- vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
- vsi_size_t out_elements[_OUTPUT_NUM] = {0};
- vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
- uint32_t i;
- int32_t align_corners;
- int32_t half_pixel_centers;
- float width_scale;
- float height_scale;
- vsi_size_t input_width, output_width, input_height, output_height;
- vsi_size_t b = 0, d = 0, w = 0, h = 0;
- vsi_size_t output_depth, input_depth;
- vsi_size_t output_batch;
- vsi_size_t output_dims;
- float data00 = .0f, data01 = .0f, data10 = .0f, data11 = .0f, interpolation = .0f;
- vsi_size_t input_width_orig;
- vsi_size_t output_width_orig;
- vsi_size_t index;
-
- /* prepare data */
- for(i = 0; i < _INPUT_NUM; i ++)
- {
- input[i] = (vsi_nn_kernel_tensor_t)param[i];
- in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
- f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
- CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
- }
- for(i = 0; i < _OUTPUT_NUM; i ++)
- {
- output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
- out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
- vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
- out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
- out_bytes[i] = out_elements[i] * sizeof(float);
- f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
- CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
- memset( f32_out_buffer[i], 0, out_bytes[i] );
- }
-
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
- vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
- input_width = in_attr[0]->shape->data[1];
- input_height = in_attr[0]->shape->data[2];
- output_width = out_attr[0]->shape->data[1];
- output_height = out_attr[0]->shape->data[2];
- output_dims = (vsi_size_t)out_attr[0]->shape->size;
- output_depth = out_attr[0]->shape->data[0];
- output_batch = output_dims > 3 ? out_attr[0]->shape->data[3] : 1;
- input_depth = in_attr[0]->shape->data[0];
- input_width_orig = input_width;
- output_width_orig = output_width;
-
- if (align_corners && output_width > 1)
- {
- width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
- }
- else
- {
- width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
- }
-
- if (align_corners && output_height > 1)
- {
- height_scale = ((vx_float32)(input_height - 1) * 1.0f) / (vx_float32)(output_height - 1);
- }
- else
- {
- height_scale = ((vx_float32)input_height * 1.0f) / (vx_float32)output_height;
- }
-
- for (b = 0; b < output_batch; b ++)
- {
- vsi_ssize_t input_base = b * input_depth * input_width_orig * input_height;
- vsi_ssize_t output_base = b * output_depth * output_width_orig * output_height;
-
- for (h = 0; h < output_height; h++)
- {
- vx_float32 input_h = h * height_scale;
- vsi_size_t h0;
- vsi_size_t h1;
-
- if (half_pixel_centers)
- {
- input_h = ((vx_float32)h + 0.5f) * height_scale - 0.5f;
- }
- else
- {
- input_h = h * height_scale;
- }
- h0 = (vsi_size_t)input_h;
- h1 = input_h < 0 ? 0 : vsi_nn_min(h0 + 1, input_height - 1);
- for (w = 0; w < output_width; w++)
- {
- vx_float32 input_w;
- vsi_ssize_t w0;
- vsi_ssize_t w1;
- if (half_pixel_centers)
- {
- input_w = ((vx_float32)w + 0.5f) * width_scale - 0.5f;
- }
- else
- {
- input_w = w * width_scale;
- }
- w0 = (vsi_ssize_t)input_w;
- w1 = input_w < 0 ? 0 : vsi_nn_min(w0 + 1, (vsi_ssize_t)(input_width - 1));
-
- for (d = 0; d < output_depth; d++)
- {
- index = input_base + h0 * input_width_orig * input_depth + w0 * input_depth + d;
- data00 = f32_in_buffer[0][index];
- index = input_base + h0 * input_width_orig * input_depth + w1 * input_depth + d;
- data01 = f32_in_buffer[0][index];
- index = input_base + h1 * input_width_orig * input_depth + w0 * input_depth + d;
- data10 = f32_in_buffer[0][index];
- index = input_base + h1 * input_width_orig * input_depth + w1 * input_depth + d;
- data11 = f32_in_buffer[0][index];
-
- interpolation = data00 * (1 - (input_h - h0)) * (1 - (input_w - w0)) +
- data10 * (input_h - h0) * (1 - (input_w - w0)) +
- data01 * (1 - (input_h - h0)) * (input_w - w0) +
- data11 * (input_h - h0) * (input_w - w0);
- index = output_base + h * output_width_orig * output_depth + w * output_depth + d;
- f32_out_buffer[0][index] = interpolation;
- }
- }
- }
- }
-
- /* save data */
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
- f32_out_buffer[i], out_elements[i] );
- CHECK_STATUS_FAIL_GOTO( status, final );
- }
-
-final:
- for (i = 0; i < _INPUT_NUM; i++)
- {
- if (f32_in_buffer[i])
- {
- free(f32_in_buffer[i]);
- f32_in_buffer[i] = NULL;
- }
- if (in_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
- }
- }
- for (i = 0; i < _OUTPUT_NUM; i++)
- {
- if (f32_out_buffer[i])
- {
- free(f32_out_buffer[i]);
- f32_out_buffer[i] = NULL;
- }
- if (out_attr[i])
- {
- vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
- }
- }
-
- return status;
-} /* _compute() */
-
-
-/*
- * Query kernel
- */
-static vsi_status _query_kernel
- (
- vsi_nn_kernel_t * kernel,
- vsi_nn_tensor_t * const * const inputs,
- vsi_nn_tensor_t * const * const outputs
- )
-{
- vsi_status status = VSI_FAILURE;
- snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
- kernel->info.function = _compute;
- kernel->info.parameters = _resize_bilinear_nhwc_kernel_param_def;
- kernel->info.numParams = _cnt_of_array( _resize_bilinear_nhwc_kernel_param_def );
- status = VSI_SUCCESS;
- return status;
-} /* _query_kernel() */
-
-
-static vsi_nn_kernel_node_t _setup
- (
- vsi_nn_graph_t * graph,
- vsi_nn_tensor_t ** inputs,
- size_t input_num,
- vsi_nn_tensor_t ** outputs,
- size_t output_num,
- const vsi_nn_kernel_param_t * params,
- vsi_nn_kernel_t * kernel
- )
-{
- vsi_status status = VSI_FAILURE;
- vsi_nn_kernel_node_param_t node_params[_RESIZE_BILINEAR_NHWC_PARAM_NUM] = {NULL};
- vsi_nn_kernel_node_t node = NULL;
- int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
- int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
-
- status = _query_kernel( kernel, inputs, outputs );
- if( VSI_SUCCESS == status)
- {
- node = vsi_nn_kernel_create_node( graph, kernel );
- if( node )
- {
- /* Set inputs and outputs */
- vsi_nn_kernel_node_pack_io( node_params, _RESIZE_BILINEAR_NHWC_PARAM_NUM,
- inputs, input_num, outputs, output_num );
- node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
- node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
- /* Pass parameters to node. */
- status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_BILINEAR_NHWC_PARAM_NUM );
- VSI_ASSERT( status == VSI_SUCCESS );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
- vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
- }
- }
- return node;
-} /* _setup() */
-
-__END_DECLS
-
-REGISTER_BACKEND_CPU( resize_bilinear_nhwc, _setup )
diff --git a/src/tim/vx/internal/src/kernel/cpu/resize_nearest_cpu.c b/src/tim/vx/internal/src/kernel/cpu/resize_nearest_cpu.c
deleted file mode 100644
index d74f6cb..0000000
--- a/src/tim/vx/internal/src/kernel/cpu/resize_nearest_cpu.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/****************************************************************************
-*
-* Copyright (c) 2020 Vivante Corporation
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in
-* all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-* DEALINGS IN THE SOFTWARE.
-*
-*****************************************************************************/
-
-
-#include
-#include
-#include