Update internal & prebuilt-sdk for 23Q1 release (#573)
Update internal to 0e9393dbb4f653b9dfceaeaaa920d4deb8b27077 Update prebuilt-sdk to 6.4.14 release Update cmakefiles to support above updates Type: New Feature Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
This commit is contained in:
parent
a32f255d7f
commit
6e38e64a1a
|
|
@ -1 +1 @@
|
|||
6.4.12_CL562241A_D561555_A558512_R558399_T558462_Oeb44e5c
|
||||
6.4.14_CL650117A_D650117_A648302_R647402_T648811_O646970
|
||||
|
|
@ -501,6 +501,8 @@ enum vx_kernel_e {
|
|||
|
||||
VX_KERNEL_STREAM_PROCESSOR = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x32,
|
||||
|
||||
VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x33,
|
||||
|
||||
VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -173,7 +173,7 @@ VX_DECONV_3D_API_SUPPORT is used to declare that vsi openvx driver can support d
|
|||
1: support
|
||||
*/
|
||||
#ifndef VX_DECONV_3D_API_SUPPORT
|
||||
#define VX_DECONV_3D_API_SUPPORT 0
|
||||
#define VX_DECONV_3D_API_SUPPORT 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
@ -237,4 +237,26 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
|
|||
#define VX_SCALE_EXTRA_PARAMETER_SUPPORT 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
VX_INVALIDATE_HANDLE_SUPPORT is used to declare that we refined vxSwapTensorHandle API to follow KHR OpenVX 1.3 spec: tensor don't maintain handle internally if new_ptr is NULL.
|
||||
[value]
|
||||
0: not support
|
||||
1: support
|
||||
*/
|
||||
#ifndef VX_INVALIDATE_HANDLE_SUPPORT
|
||||
#define VX_INVALIDATE_HANDLE_SUPPORT 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
VX_ACTIVATION_EXT2_SUPPORT is used to declare that ACTIVATION can support sign, hard_sigmoid, neg, clip, exp, sin, cos,
|
||||
log, mish, gelu, hgelu, elu, selu, celu, rcp, softsign, atan, atanh, acosh, inverse sigmoid, round and erf.
|
||||
[value]
|
||||
0: not support
|
||||
1: support
|
||||
*/
|
||||
#ifndef VX_ACTIVATION_EXT2_SUPPORT
|
||||
#define VX_ACTIVATION_EXT2_SUPPORT 1
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __VX_KHR_COMPATIBLE_H__ */
|
||||
|
|
|
|||
|
|
@ -219,6 +219,28 @@ enum vx_nn_activation_function_e
|
|||
VX_NN_ACTIVATION_HSWISH = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x6,
|
||||
VX_NN_ACTIVATION_CUSTOM = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x7,
|
||||
VX_NN_ACTIVATION_NONE = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x8,
|
||||
VX_NN_ACTIVATION_SIGN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x9,
|
||||
VX_NN_ACTIVATION_HSIGMOID_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xa,
|
||||
VX_NN_ACTIVATION_NEG_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xb,
|
||||
VX_NN_ACTIVATION_CLIP_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xc,
|
||||
VX_NN_ACTIVATION_EXP_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xd,
|
||||
VX_NN_ACTIVATION_SIN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xe,
|
||||
VX_NN_ACTIVATION_COS_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xf,
|
||||
VX_NN_ACTIVATION_LOG_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x10,
|
||||
VX_NN_ACTIVATION_MISH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x11,
|
||||
VX_NN_ACTIVATION_GELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x12,
|
||||
VX_NN_ACTIVATION_HGELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x13,
|
||||
VX_NN_ACTIVATION_ELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x14,
|
||||
VX_NN_ACTIVATION_SELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x15,
|
||||
VX_NN_ACTIVATION_CELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x16,
|
||||
VX_NN_ACTIVATION_RECIPROCAL_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x17,
|
||||
VX_NN_ACTIVATION_SOFTSIGN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x18,
|
||||
VX_NN_ACTIVATION_ATAN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x19,
|
||||
VX_NN_ACTIVATION_ATANH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1a,
|
||||
VX_NN_ACTIVATION_ACOSH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1b,
|
||||
VX_NN_ACTIVATION_INVERSE_SIGMOID_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1c,
|
||||
VX_NN_ACTIVATION_ROUND_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1d,
|
||||
VX_NN_ACTIVATION_ERF_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1e,
|
||||
};
|
||||
|
||||
/*! \brief The Convolutional network type
|
||||
|
|
@ -623,6 +645,19 @@ VX_API_ENTRY vx_tensor VX_API_CALL vxCreateTensorFromHandle2(
|
|||
* \retval VX_ERROR_INVALID_REFERENCE tensor is not a valid <tt>\ref vx_tensor</tt> <tt>\ref vx_image</tt>reference created from Handle.
|
||||
*/
|
||||
VX_API_ENTRY vx_status VX_API_CALL vxFlushHandle(vx_reference ref);
|
||||
/* !\brief Same as vxFlushHandle() also added by Verisilicon as extension API.
|
||||
*/
|
||||
VX_API_ENTRY vx_status VX_API_CALL vxFlushHandleVSI(vx_reference ref);
|
||||
|
||||
#if defined(VX_INVALIDATE_HANDLE_SUPPORT) && VX_INVALIDATE_HANDLE_SUPPORT
|
||||
/*! \brief Invalidate the memory referenced by reference's handle when it is ready.
|
||||
* added by Versilicon as extension API.
|
||||
* \param [in] ref The reference(image or tensor) which created from handle.
|
||||
* \return A <tt>\ref vx_status_e</tt> enumeration.;
|
||||
* \retval VX_ERROR_INVALID_REFERENCE tensor is not a valid <tt>\ref vx_tensor</tt> <tt>\ref vx_image</tt>reference created from Handle.
|
||||
*/
|
||||
VX_API_ENTRY vx_status VX_API_CALL vxInvalidateHandleVSI(vx_reference ref);
|
||||
#endif
|
||||
|
||||
#if VX_VA40_EXT_SUPPORT
|
||||
/*! \brief Return a new tensor referencing the same memory location but with different shape.
|
||||
|
|
@ -776,6 +811,14 @@ typedef struct _vx_nn_convolution_params_ext2_t
|
|||
|
||||
vx_int32 depth_multiplier; /*!< \brief depthwise multiplier value, if 0, means convolution, elsewise(>=1), the convolution is depthwiseconvolution. */
|
||||
} vx_nn_convolution_params_ext2_t;
|
||||
|
||||
typedef struct _vx_nn_convolution_params_ext3_t
|
||||
{
|
||||
vx_nn_convolution_params_ext2_t ext2; /*!< \brief Convolution extension structure head */
|
||||
|
||||
vx_bool isPPU; /*!< \brief merge convolution and relu for PPU. */
|
||||
} vx_nn_convolution_params_ext3_t;
|
||||
|
||||
/*==============================================================================
|
||||
NN Nodes
|
||||
=============================================================================*/
|
||||
|
|
@ -2142,7 +2185,8 @@ typedef struct _vx_hardware_caps_params_ext_t
|
|||
typedef struct _vx_hardware_caps_params_ext2_t
|
||||
{
|
||||
vx_hardware_caps_params_ext_t base;
|
||||
vx_uint32 streamProcessorExecCount; /*!< \brief streamprocess execution count. */
|
||||
vx_uint32 streamProcessorExecCount; /*!< \brief stream processor execution count. */
|
||||
vx_uint32 streamProcessorVectorSize; /*!< \brief stream processor vector size. */
|
||||
} vx_hardware_caps_params_ext2_t;
|
||||
|
||||
/*! \brief Queries hardware caps information.
|
||||
|
|
|
|||
|
|
@ -236,6 +236,12 @@ typedef struct _vx_nn_convolution_relu_pooling_params_ext6_t
|
|||
|
||||
} vx_nn_convolution_relu_pooling_params_ext6_t, * vx_nn_convolution_relu_pooling_params_ext6;;
|
||||
|
||||
typedef struct _vx_nn_convolution_relu_pooling_params_ext7_t
|
||||
{
|
||||
vx_nn_convolution_relu_pooling_params_ext6_t ext6; /*!< \brief convolution relu pooling params <tt>\ref vx_nn_convolution_relu_pooling_params_ext_t</tt> */
|
||||
vx_bool isSub;
|
||||
} vx_nn_convolution_relu_pooling_params_ext7_t, * vx_nn_convolution_relu_pooling_params_ext7;
|
||||
|
||||
/*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion.
|
||||
* \details This function implement Convolutional Network Convolution and Activation(Relu) and Pooling layer.
|
||||
* For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined,
|
||||
|
|
@ -1081,6 +1087,48 @@ VX_API_ENTRY vx_node VX_API_CALL vxTensorTableLookupLayer(
|
|||
vx_lut OutLut,
|
||||
vx_tensor output);
|
||||
|
||||
typedef struct _vx_nn_gemm_relu_pooling_params_t
|
||||
{
|
||||
vx_bool enable_relu; /*!< \brief Enable Relu layer function or not. */
|
||||
vx_bool enable_leaky_relu; /*!< \brief Enable LeakyRelu layer function or not. */
|
||||
vx_float32 alpha; /*!< \brief Alpha value for Activation */
|
||||
vx_float32 beta; /*!< \brief Beta value for Activation */
|
||||
vx_uint32 node_count; /*!< \brief node count to merge */
|
||||
vx_float32 merged_scale[MERGED_NODE_COUNT_MAX]; /*!< \brief scale of merged node output */
|
||||
vx_int32 merged_zero_point[MERGED_NODE_COUNT_MAX]; /*!< \brief zero point of merged node output */
|
||||
vx_enum merged_data_type[MERGED_NODE_COUNT_MAX]; /*!< \brief data type of merged node output */
|
||||
vx_enum act_func; /*!< \brief nn activation function */
|
||||
vx_lut lut_in; /*!< \brief LUT in */
|
||||
vx_lut lut_out; /*!< \brief LUT out */
|
||||
vx_bool enbale_const_multiplier; /*!< \brief tensor mul with one of inputs as a single pixel const tensor */
|
||||
vx_float32 const_multiplier; /*!< \brief const multiplier */
|
||||
} vx_nn_gemm_relu_pooling_params_t, * vx_nn_gemm_relu_pooling_params;
|
||||
|
||||
/*! \brief Create a batch gemm node, the calcution formula is output = matrix_a * matrix_b + matrix_c.
|
||||
* \param [in] graph The reference to the graph.
|
||||
* \param [in] matrix_a The first input tensor.
|
||||
* \param [in] matrix_b The second input tensor. Must be in the same data type and batch count as first input tensor.
|
||||
* \param [in] matrix_c The third input tensor. Must be in the same data type and batch count as first input tensor. [optional]
|
||||
* \param [in] trans_a If true, the matrix_a has been transposed before calcution.
|
||||
* \param [in] trans_b If true, the matrix_b has been transposed before calcution.
|
||||
* \param [in] trans_c If true, the matrix_c has been transposed before calcution. [optional]
|
||||
* \param [in] merge_param the parameters for gemm + op merging
|
||||
* \param [out] output The output tensor. Output dimension must agree the formula in the description.
|
||||
* \return <tt>\ref vx_node</tt>.
|
||||
* \retval vx_node A node reference. Any possible errors preventing a successful creation
|
||||
* should be checked using <tt>\ref vxGetStatus</tt>
|
||||
* \ingroup group_vision_function_gemm
|
||||
*/
|
||||
VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingLayer(vx_graph graph,
|
||||
vx_tensor matrix_a,
|
||||
vx_tensor matrix_b,
|
||||
vx_tensor matrix_c,
|
||||
vx_scalar trans_a,
|
||||
vx_scalar trans_b,
|
||||
vx_scalar trans_c,
|
||||
const vx_nn_gemm_relu_pooling_params merge_param,
|
||||
vx_tensor output);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@ typedef enum _vx_sp_attribute_e
|
|||
VX_SP_ATTRIBUTE_SUM_ENGINE_CONTROL,
|
||||
VX_SP_ATTRIBUTE_SUM_ENGINE_NUM_CH_MINUS_ONE,
|
||||
VX_SP_ATTRIBUTE_SUM_ENGINE_2D_ACCUM_STORAGE,
|
||||
VX_SP_ATTRIBUTE_SUM_ENGINE_OP_SELECT,
|
||||
|
||||
VX_SP_ATTRIBUTE_NUM_OF_ELEMENTS_PER_LOOP_PER_INPUT,
|
||||
|
||||
|
|
@ -181,6 +182,18 @@ typedef enum _vx_sp_attribute_e
|
|||
VX_SP_ATTRIBUTE_CONST3, /* NN clamp max */
|
||||
VX_SP_ATTRIBUTE_CONST4, /* NN clmap min */
|
||||
|
||||
VX_SP_ATTRIBUTE_CONST_COUNT,
|
||||
|
||||
VX_SP_ATTRIBUTE_SPLIT_AXIS,
|
||||
VX_SP_ATTRIBUTE_SPLIT_MAX_SIZE,
|
||||
VX_SP_ATTRIBUTE_SPLIT_TILEX_EQUAL_INIMAGEX,
|
||||
|
||||
VX_SP_ATTRIBUTE_NOT_MERGE_CONVSP,
|
||||
VX_SP_ATTRIBUTE_UPDATE_CONST0_TO_PCQ_COEF_TENSOR,
|
||||
VX_SP_ATTRIBUTE_RESHAPE_ARRAY, /* bit layout | output:24-29 | input3:18-23 | input2:12-17 | input1:6-11 | input0:0-5 | */
|
||||
VX_SP_ATTRIBUTE_ALIGN_SP_CORE_AXIS,
|
||||
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE,
|
||||
|
||||
VX_SP_ATTRIBUTE_TOTAL_COUNT,
|
||||
}
|
||||
vx_sp_attribute_e;
|
||||
|
|
@ -274,9 +287,55 @@ typedef enum _vx_sp_attribute_sum_engine_2d_accum_storage_e
|
|||
}
|
||||
vx_sp_attribute_sum_engine_2d_accum_storage_e;
|
||||
|
||||
typedef enum _vx_sp_attribute_sum_engine_op_select_e
|
||||
{
|
||||
VX_SP_ATTRIBUTE_SUM_ENGINE_SUM_OP,
|
||||
VX_SP_ATTRIBUTE_SUM_ENGINE_MAX_OP
|
||||
} vx_sp_attribute_sum_engine_op_select_e;
|
||||
|
||||
typedef enum _vx_sp_attribute_reshape_e
|
||||
{
|
||||
VX_SP_ATTRIBUTE_RESHAPE_CHW2CHW = 0x00,
|
||||
VX_SP_ATTRIBUTE_RESHAPE_CHW2WHC = 0x06,
|
||||
VX_SP_ATTRIBUTE_RESHAPE_CHW2WCH = 0x09,
|
||||
VX_SP_ATTRIBUTE_RESHAPE_CHW2HWC = 0x12,
|
||||
VX_SP_ATTRIBUTE_RESHAPE_CHW2HCW = 0x18,
|
||||
VX_SP_ATTRIBUTE_RESHAPE_CHW2CWH = 0x21,
|
||||
}
|
||||
vx_sp_attribute_reshape_e;
|
||||
|
||||
typedef enum _vx_sp_attribute_split_axis_e
|
||||
{
|
||||
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_X,
|
||||
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_Y,
|
||||
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_Z,
|
||||
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_XY,
|
||||
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_YZ,
|
||||
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_XYZ,
|
||||
}
|
||||
vx_sp_attribute_split_axis_e;
|
||||
|
||||
typedef enum _vx_sp_attribute_tile_align_sp_core_e
|
||||
{
|
||||
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_NONE = 0,
|
||||
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_X,
|
||||
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_Y,
|
||||
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_XY,
|
||||
}
|
||||
vx_sp_attribute_tile_align_sp_core_e;
|
||||
|
||||
typedef enum _vx_sp_attribute_keep_tile_size_e
|
||||
{
|
||||
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_NONE = 0,
|
||||
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_X,
|
||||
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_Y,
|
||||
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_XY,
|
||||
}
|
||||
vx_sp_attribute_keep_tile_size_e;
|
||||
|
||||
/**********************************************************************************************/
|
||||
|
||||
/*! \brief Creates an opaque reference to a spinst data.
|
||||
/*! \brief Creates an external reference to a spinst data.
|
||||
* \param [in] context The reference to the implementation context.
|
||||
* \return A spinst data reference.
|
||||
* \Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||
|
|
@ -286,7 +345,17 @@ VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINST(
|
|||
vx_context context
|
||||
);
|
||||
|
||||
/*! \brief Releases a reference to a spinst object.
|
||||
/*! \brief Creates an internal reference to a spinst data.
|
||||
* \param [in] context The reference to the implementation context.
|
||||
* \return A spinst data reference.
|
||||
* \Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||
* \ingroup group_object_spinst
|
||||
*/
|
||||
VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINSTInternal(
|
||||
vx_context context
|
||||
);
|
||||
|
||||
/*! \brief Releases a reference to a external spinst object.
|
||||
* The object may not be garbage collected until its total reference count is zero.
|
||||
* \param [in] spinst_obj The pointer to the spinst data to release.
|
||||
* \post After returning from this function the reference is zeroed.
|
||||
|
|
@ -299,6 +368,19 @@ VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINST(
|
|||
vx_spinst *spinst_obj
|
||||
);
|
||||
|
||||
/*! \brief Releases a reference to a internal spinst object.
|
||||
* The object may not be garbage collected until its total reference count is zero.
|
||||
* \param [in] spinst_obj The pointer to the spinst data to release.
|
||||
* \post After returning from this function the reference is zeroed.
|
||||
* \return A <tt>\ref vx_status_e</tt> enumeration.
|
||||
* \retval VX_SUCCESS No errors; all other values indicate failure
|
||||
* \retval * An error occurred. See <tt>\ref vx_status_e</tt>.
|
||||
* \ingroup group_object_spinst
|
||||
*/
|
||||
VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINSTInternal(
|
||||
vx_spinst *spinst_obj
|
||||
);
|
||||
|
||||
/*! \brief Add a instruction to spinst object.
|
||||
* \param [in] spinst_obj The reference to the spinst object.
|
||||
* \param [in] inst_unit_array The units of one instruction. Use a <tt>\ref vx_spinst_unit_param</tt>.
|
||||
|
|
@ -332,6 +414,12 @@ VX_API_ENTRY vx_status VX_API_CALL vxSetAttributeToSPINST(
|
|||
vx_uint32 value
|
||||
);
|
||||
|
||||
VX_API_ENTRY vx_status VX_API_CALL vxGetAttributeToSPINST(
|
||||
vx_spinst spinst_obj,
|
||||
vx_enum attribute,
|
||||
vx_uint32* value
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -539,6 +539,15 @@ typedef vx_enum vx_action;
|
|||
*/
|
||||
typedef vx_action (VX_CALLBACK *vx_nodecomplete_f)(vx_node node);
|
||||
|
||||
/*! \brief A callback to the client for querying information of a node.
|
||||
* \see vx_action
|
||||
* \see vxAssignNodeCallback
|
||||
* \param [in] node The node to which the callback was attached.
|
||||
* \return An action code from <tt>\ref vx_action_e</tt>.
|
||||
* \ingroup group_node_callback
|
||||
*/
|
||||
typedef vx_status (VX_CALLBACK *vx_nodequery_f)(vx_node node);
|
||||
|
||||
/*! \brief Vendor IDs are 2 nibbles in size and are located in the upper byte of
|
||||
* the 4 bytes of an enumeration.
|
||||
* \ingroup group_basic_features
|
||||
|
|
@ -1028,6 +1037,11 @@ enum vx_node_attribute_e {
|
|||
|
||||
VX_NODE_ATTRIBUTE_FOR_HW_QUALITY = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0xA,
|
||||
|
||||
VX_NODE_SWTILING_TILE_XY = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x10,
|
||||
VX_NODE_SPINST_INDEX = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x11,
|
||||
VX_NODE_SPCONV_PCQ_REPLACE_SPINST = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x12,
|
||||
VX_NODE_SP_NAME = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x13,
|
||||
VX_NODE_SPINST = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x14,
|
||||
};
|
||||
|
||||
/*! \brief The parameter attributes list
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -31,6 +31,7 @@ if(${TIM_VX_USE_EXTERNAL_OVXLIB})
|
|||
set(OVXLIB_INCLUDE_DIR ${OVXLIB_INC})
|
||||
else()
|
||||
set(OVXLIB_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/src/tim/vx/internal/include")
|
||||
list(APPEND OVXLIB_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/src/tim/vx/internal/src")
|
||||
endif()
|
||||
message(STATUS "OVXLIB include directory: ${OVXLIB_INCLUDE_DIR}")
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,6 @@ filegroup(
|
|||
"src/custom/ops/*.c",
|
||||
"src/custom/ops/kernel/evis/*.c",
|
||||
"src/custom/ops/kernel/cl/*.c",
|
||||
"src/custom/ops/kernel/cpu/*.c",
|
||||
])
|
||||
)
|
||||
|
||||
|
|
@ -84,6 +83,7 @@ cc_library(
|
|||
linkstatic = True,
|
||||
includes = [
|
||||
"include",
|
||||
"src",
|
||||
],
|
||||
hdrs = [
|
||||
"include/vsi_nn_pub.h",
|
||||
|
|
@ -104,6 +104,7 @@ cc_library(
|
|||
"include/vsi_nn_compatibility.h",
|
||||
"include/vsi_nn_assert.h",
|
||||
"include/vsi_nn_feature.h",
|
||||
"include/vsi_nn_post.h",
|
||||
"include/vsi_nn_rnn.h",
|
||||
"include/vsi_nn_rnn_helper.h",
|
||||
"include/vsi_nn_rnn_prv.h",
|
||||
|
|
@ -121,13 +122,15 @@ cc_library(
|
|||
"include/utils/vsi_nn_limits.h",
|
||||
"include/utils/vsi_nn_dtype_util.h",
|
||||
"include/utils/vsi_nn_dtype_util_prv.h",
|
||||
"include/utils/vsi_nn_vdata.h",
|
||||
"include/utils/vsi_nn_tensor_op.h",
|
||||
"include/utils/vsi_nn_dlfcn.h",
|
||||
"include/utils/vsi_nn_shape_util.h",
|
||||
"include/utils/vsi_nn_constraint_check.h",
|
||||
"include/quantization/vsi_nn_asymmetric_affine.h",
|
||||
"include/quantization/vsi_nn_dynamic_fixed_point.h",
|
||||
"include/quantization/vsi_nn_perchannel_symmetric_affine.h",
|
||||
"include/post/vsi_nn_post_fasterrcnn.h",
|
||||
"include/post/vsi_nn_post_cmupose.h",
|
||||
"include/interface/ops.def",
|
||||
"include/kernel/vsi_nn_kernel.h",
|
||||
"include/kernel/vsi_nn_gpu.h",
|
||||
|
|
@ -168,6 +171,9 @@ cc_library(
|
|||
"src/vsi_nn_daemon.c",
|
||||
"src/vsi_nn_graph_optimization.c",
|
||||
"src/vsi_nn_pre_post_process.c",
|
||||
"src/vsi_nn_tensor_util_prv.h",
|
||||
"src/vsi_nn_types_prv.h",
|
||||
"src/vsi_nn_kernel_prv.h",
|
||||
"src/utils/vsi_nn_link_list.c",
|
||||
"src/utils/vsi_nn_util.c",
|
||||
"src/utils/vsi_nn_math.c",
|
||||
|
|
@ -177,14 +183,16 @@ cc_library(
|
|||
"src/utils/vsi_nn_hashmap.c",
|
||||
"src/utils/vsi_nn_limits.c",
|
||||
"src/utils/vsi_nn_dtype_util.c",
|
||||
"src/utils/vsi_nn_vdata.c",
|
||||
"src/utils/vsi_nn_tensor_op.c",
|
||||
"src/utils/vsi_nn_dlfcn.c",
|
||||
"src/utils/vsi_nn_shape_util.c",
|
||||
"src/utils/vsi_nn_dtype.c",
|
||||
"src/utils/vsi_nn_constraint_check.c",
|
||||
"src/quantization/vsi_nn_asymmetric_affine.c",
|
||||
"src/quantization/vsi_nn_dynamic_fixed_point.c",
|
||||
"src/quantization/vsi_nn_perchannel_symmetric_affine.c",
|
||||
"src/post/vsi_nn_post_fasterrcnn.c",
|
||||
"src/post/vsi_nn_post_cmupose.c",
|
||||
"src/kernel/vsi_nn_kernel.c",
|
||||
"src/kernel/vsi_nn_kernel_util.c",
|
||||
"src/kernel/vsi_nn_kernel_backend.c",
|
||||
|
|
@ -202,4 +210,3 @@ cc_library(
|
|||
+ [":custom_srcs"],
|
||||
deps = ["//prebuilt-sdk:VIV_SDK_LIB"]
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,3 +5,4 @@ DEF_NODE_TYPE(custom_softmax)
|
|||
DEF_NODE_TYPE(custom_ainr_denoise_postprocess)
|
||||
DEF_NODE_TYPE(custom_warp_affine)
|
||||
DEF_NODE_TYPE(custom_warp_perspective)
|
||||
DEF_NODE_TYPE(custom_sample)
|
||||
|
|
|
|||
|
|
@ -5,3 +5,4 @@ DEF_OP(CUSTOM_SOFTMAX)
|
|||
DEF_OP(CUSTOM_AINR_DENOISE_POSTPROCESS)
|
||||
DEF_OP(CUSTOM_WARP_AFFINE)
|
||||
DEF_OP(CUSTOM_WARP_PERSPECTIVE)
|
||||
DEF_OP(CUSTOM_SAMPLE)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_OP_CUSTOM_SAMPLE_H
|
||||
#define _VSI_NN_OP_CUSTOM_SAMPLE_H
|
||||
|
||||
#include "vsi_nn_platform.h"
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
typedef struct _vsi_nn_custom_sample_param
|
||||
{
|
||||
int32_t axis;
|
||||
} vsi_nn_custom_sample_param;
|
||||
|
||||
#endif
|
||||
|
|
@ -30,5 +30,6 @@
|
|||
#include "custom/ops/vsi_nn_op_custom_ainr_denoise_postprocess.h"
|
||||
#include "custom/ops/vsi_nn_op_custom_warp_affine.h"
|
||||
#include "custom/ops/vsi_nn_op_custom_warp_perspective.h"
|
||||
#include "custom/ops/vsi_nn_op_custom_sample.h"
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -183,3 +183,13 @@ DEF_OP(LPPOOL)
|
|||
DEF_OP(SCATTER_ELEMENTS)
|
||||
DEF_OP(PRE_PROCESS_YUV422)
|
||||
DEF_OP(BUCKETIZE)
|
||||
DEF_OP(GLOBALLPPOOL)
|
||||
DEF_OP(AVG_POOL3D)
|
||||
DEF_OP(ATAN)
|
||||
DEF_OP(ATANH)
|
||||
DEF_OP(ACOSH)
|
||||
DEF_OP(MAXUNPOOL)
|
||||
DEF_OP(REVERSESEQUENCE)
|
||||
DEF_OP(INVERSE_SIGMOID)
|
||||
DEF_OP(GRID_SAMPLE)
|
||||
DEF_OP(LPNORM)
|
||||
|
|
|
|||
|
|
@ -20,3 +20,4 @@ DEF_OP(SPACE2DEPTH_INTERNAL)
|
|||
DEF_OP(GRUCELL_H_TIMES_ACTIVATION_R)
|
||||
DEF_OP(GRUCELL_ACTIVATION_Z_H)
|
||||
DEF_OP(REDUCE_MEAN_INTERNAL)
|
||||
DEF_OP(BILINEAR_GRID_SAMPLE)
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ typedef struct
|
|||
gpu_dp_type_e type;
|
||||
} gpu_dp_inst_t;
|
||||
|
||||
typedef struct
|
||||
typedef struct VSI_PUBLIC_TYPE
|
||||
{
|
||||
uint32_t dim;
|
||||
size_t global_offset[GPU_MAX_DIMENSION_SIZE];
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ typedef enum
|
|||
VSI_NN_KERNEL_TYPE_SP,
|
||||
VSI_NN_KERNEL_TYPE_NUM,
|
||||
VSI_NN_KERNEL_TYPE_NONE = VSI_NN_KERNEL_TYPE_NUM
|
||||
} vsi_nn_kernel_type_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_kernel_type_e;
|
||||
|
||||
/** Kernel pirority */
|
||||
enum
|
||||
|
|
@ -79,7 +79,7 @@ typedef enum
|
|||
BOOL8,
|
||||
I4,
|
||||
U4,
|
||||
} vsi_nn_kernel_dtype_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_kernel_dtype_e;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
|
@ -98,7 +98,7 @@ typedef enum
|
|||
VSI_NN_GPU_SOURCE_FMT_CODE = 0,
|
||||
VSI_NN_GPU_SOURCE_FMT_EXECUTABLE = 1,
|
||||
VSI_NN_GPU_SOURCE_FMT_NUM
|
||||
} vsi_nn_gpu_source_fmt_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_gpu_source_fmt_e;
|
||||
|
||||
typedef char * vsi_nn_kernel_source_t;
|
||||
typedef uint32_t vsi_nn_kernel_unique_id_t;
|
||||
|
|
@ -125,7 +125,7 @@ typedef struct
|
|||
vsi_nn_kernel_source_info_t sources[VSI_NN_GPU_SOURCE_FMT_NUM];
|
||||
vsi_nn_gpu_source_fmt_e active_source_fmt;
|
||||
} gpu;
|
||||
} vsi_nn_kernel_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_kernel_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
|
@ -172,15 +172,15 @@ typedef struct
|
|||
int32_t allow_kernel_num;
|
||||
} vsi_nn_kernel_selector_t;
|
||||
|
||||
typedef void * vsi_nn_kernel_node_param_t;
|
||||
typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_node_param_t;
|
||||
|
||||
typedef void * vsi_nn_kernel_tensor_t;
|
||||
|
||||
typedef void * vsi_nn_kernel_node_t;
|
||||
typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_node_t;
|
||||
|
||||
typedef void * vsi_nn_kernel_graph_t;
|
||||
|
||||
typedef void * vsi_nn_kernel_scalar_t;
|
||||
typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_scalar_t;
|
||||
|
||||
typedef vsi_nn_hashmap_t vsi_nn_kernel_param_t;
|
||||
|
||||
|
|
|
|||
|
|
@ -51,6 +51,10 @@ typedef int32_t vsi_nn_kernel_lut_act_e; enum
|
|||
VSI_NN_KERNEL_LUT_LINEAR_EXP = 17,
|
||||
VSI_NN_KERNEL_LUT_LINEAR_RSQRT = 18,
|
||||
VSI_NN_KERNEL_LUT_LINEAR_SIGMOID = 19,
|
||||
VSI_NN_KERNEL_LUT_ATAN = 20,
|
||||
VSI_NN_KERNEL_LUT_ATANH = 21,
|
||||
VSI_NN_KERNEL_LUT_ACOSH = 22,
|
||||
VSI_NN_KERNEL_LUT_INVERSE_SIGMOID = 23,
|
||||
|
||||
};
|
||||
|
||||
|
|
@ -67,6 +71,8 @@ typedef struct _vsi_nn_kernel_lut_
|
|||
typedef struct _vsi_nn_kernel_lut_params
|
||||
{
|
||||
vsi_enum act_type;
|
||||
vsi_bool pwl_sign_remove_support;
|
||||
float clamp_min;
|
||||
float params[16];
|
||||
} vsi_nn_kernel_lut_params;
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ typedef struct vsi_nn_kernel_info
|
|||
vx_kernel_description_t ** kernel;
|
||||
uint8_t kernel_index;
|
||||
uint8_t init_index;
|
||||
} vsi_nn_kernel_info_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_kernel_info_t;
|
||||
|
||||
uint8_t * vsi_nn_LoadBinarySource
|
||||
(
|
||||
|
|
|
|||
|
|
@ -112,6 +112,7 @@ typedef struct _vsi_nn_argmax_param
|
|||
/* argmax layer local data structure */
|
||||
vsi_nn_argmax_lcl_data local;
|
||||
int32_t axis;
|
||||
vsi_bool keep_dims;
|
||||
} vsi_nn_argmax_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ typedef struct _vsi_nn_argmin_param
|
|||
/* argmin layer local data structure */
|
||||
vsi_nn_argmin_lcl_data local;
|
||||
int32_t axis;
|
||||
vsi_bool keep_dims;
|
||||
} vsi_nn_argmin_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_AVG_POOL3D_H
|
||||
#define _VSI_NN_OP_AVG_POOL3D_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_avg_pool3d_param
|
||||
{
|
||||
/* round_type is used to calculate the output shape */
|
||||
vsi_nn_round_type_e round_type;
|
||||
uint32_t ksize[3];
|
||||
uint32_t stride[3];
|
||||
/* Pad left, right, top, bottom, front, end */
|
||||
uint32_t pad[6];
|
||||
/* Pad type default value shall be AUTO */
|
||||
vsi_nn_pad_e pad_type;
|
||||
/* Whether include pad pixels when calculating value for the edges */
|
||||
int32_t count_include_pad;
|
||||
} vsi_nn_avg_pool3d_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -21,36 +21,31 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_VDATA_H
|
||||
#define _VSI_NN_VDATA_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#ifndef _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
|
||||
#define _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
|
||||
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
OVXLIB_API uint8_t * vsi_nn_VdataCreate
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_node_t * node,
|
||||
uint32_t * p_stream_size
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_nn_tensor_t * vsi_nn_CreateVDataTensor
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
uint8_t * stream,
|
||||
vsi_nn_tensor_attr_t * attr
|
||||
);
|
||||
typedef struct _vsi_nn_bilinear_grid_sample_param
|
||||
{
|
||||
struct _bilinear_grid_sample_local_data_t* local;
|
||||
vsi_bool align_corners;
|
||||
vsi_nn_pad_mode_e padding_mode;
|
||||
int32_t const_val;
|
||||
} vsi_nn_bilinear_grid_sample_param;
|
||||
|
||||
_compiler_assert(offsetof(vsi_nn_bilinear_grid_sample_param, local) == 0, \
|
||||
vsi_nn_bilinear_grid_sample_h );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -21,34 +21,23 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#ifndef _VSI_NN_OP_GATHER_ND_H
|
||||
#define _VSI_NN_OP_GATHER_ND_H
|
||||
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_ops.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
uint8_t * vsi_nn_VdataCreate
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_node_t * node,
|
||||
uint32_t * p_stream_size
|
||||
)
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_gather_nd_param
|
||||
{
|
||||
return NULL;
|
||||
} /* vsi_nn_VdataCreate() */
|
||||
int32_t batch_dims;
|
||||
} vsi_nn_gather_nd_param;
|
||||
|
||||
vsi_nn_tensor_t * vsi_nn_CreateVDataTensor
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
uint8_t * stream,
|
||||
vsi_nn_tensor_attr_t * attr
|
||||
)
|
||||
{
|
||||
return NULL;
|
||||
} /* vsi_nn_CreateVDataTensor() */
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_GLOBALLPPOOL_H
|
||||
#define _VSI_NN_OP_GLOBALLPPOOL_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_globallppool_param
|
||||
{
|
||||
int32_t p;
|
||||
} vsi_nn_globallppool_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_GRID_SAMPLE_H
|
||||
#define _VSI_NN_OP_GRID_SAMPLE_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//typedef uint32_t vsi_nn_grid_sample_mode_t;
|
||||
//enum { bilinear = 0, nearest };
|
||||
//
|
||||
//typedef uint32_t vsi_nn_grid_sample_padding_mode_t;
|
||||
//enum { zeros = 0, CONST };
|
||||
|
||||
typedef struct _grid_sample_local_data_t {
|
||||
int32_t placeholder;
|
||||
} grid_sample_local_data_t;
|
||||
|
||||
typedef struct _vsi_nn_grid_sample_param
|
||||
{
|
||||
grid_sample_local_data_t* local;
|
||||
vsi_enum mode;
|
||||
vsi_bool align_corners;
|
||||
vsi_nn_pad_mode_e padding_mode;
|
||||
int32_t const_val;
|
||||
} vsi_nn_grid_sample_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -67,7 +67,7 @@ typedef struct _vsi_nn_imageprocess_param
|
|||
int32_t mean_value_size;
|
||||
float* mean_value;
|
||||
} mean;
|
||||
} vsi_nn_imageprocess_param;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_imageprocess_param;
|
||||
|
||||
/**
|
||||
* Insert imageprocess op for image pre process
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_INVERSE_SIGMOID_H
|
||||
#define _VSI_NN_OP_INVERSE_SIGMOID_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_inverse_sigmoid_param
|
||||
{
|
||||
// Add parameters here
|
||||
float eps;
|
||||
} vsi_nn_inverse_sigmoid_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_LPNORM_H
|
||||
#define _VSI_NN_OP_LPNORM_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_lpnorm_param
|
||||
{
|
||||
int axis;
|
||||
int p;
|
||||
} vsi_nn_lpnorm_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_MAXUNPOOL_H
|
||||
#define _VSI_NN_OP_MAXUNPOOL_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_maxunpool_param
|
||||
{
|
||||
// Add parameters here
|
||||
uint32_t ksize[2];
|
||||
uint32_t pad[4];
|
||||
uint32_t stride[2];
|
||||
const uint32_t *output_size;
|
||||
} vsi_nn_maxunpool_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -68,6 +68,8 @@ typedef struct _vsi_nn_pre_process_nv12_param
|
|||
vsi_bool reverse_channel;
|
||||
|
||||
vsi_nn_pre_process_nv12_lcl_data* local;
|
||||
|
||||
vsi_nn_nv_type nv_type;
|
||||
} vsi_nn_pre_process_nv12_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ typedef struct _vsi_nn_reduce_mean_internal_param
|
|||
vx_int32 *axis;
|
||||
vx_uint32 axis_num;
|
||||
float scale;
|
||||
vsi_enum type;
|
||||
} vsi_nn_reduce_mean_internal_param;
|
||||
_compiler_assert(offsetof(vsi_nn_reduce_mean_internal_param, local) == 0, \
|
||||
vsi_nn_reduce_mean_internal_h );
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_REVERSESEQUENCE_H
|
||||
#define _VSI_NN_OP_REVERSESEQUENCE_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_reversesequence_param
|
||||
{
|
||||
int32_t batch_axis;
|
||||
int32_t time_axis;
|
||||
} vsi_nn_reversesequence_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -38,6 +38,7 @@ typedef struct _vsi_nn_roi_align_param
|
|||
float width_ratio;
|
||||
int32_t height_sample_num;
|
||||
int32_t width_sample_num;
|
||||
vsi_nn_roi_align_type_e platform_type;
|
||||
} vsi_nn_roi_align_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ typedef struct _vsi_nn_strided_slice_lcl_data2
|
|||
|
||||
vsi_bool is_dataconvert_op;
|
||||
vsi_bool is_optimized;
|
||||
vsi_bool is_same_shape;
|
||||
|
||||
strided_slice_param params;
|
||||
} vsi_nn_strided_slice_lcl_data2;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,163 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_POST_CMUPOSE_H_
|
||||
#define _VSI_NN_POST_CMUPOSE_H_
|
||||
|
||||
#include "utils/vsi_nn_link_list.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_subset_data_t
|
||||
{
|
||||
float idx[20];
|
||||
}vsi_nn_subset_data_t;
|
||||
|
||||
typedef struct _vsi_nn_subset_t
|
||||
{
|
||||
vsi_nn_link_list_t link_list;
|
||||
vsi_nn_subset_data_t data;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_subset_t;
|
||||
|
||||
typedef struct _vsi_nn_peaks_data_t
|
||||
{
|
||||
uint32_t location[2];
|
||||
float score;
|
||||
uint32_t id;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_peaks_data_t;
|
||||
|
||||
typedef struct _vsi_nn_peaks_t
|
||||
{
|
||||
vsi_nn_link_list_t link_list;
|
||||
vsi_nn_peaks_data_t peak;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_peaks_t;
|
||||
|
||||
typedef struct _vsi_nn_conncection_data_t
|
||||
{
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
float score;
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
}vsi_nn_connection_data_t;
|
||||
|
||||
typedef struct _vsi_nn_connection_t
|
||||
{
|
||||
vsi_nn_link_list_t link_list;
|
||||
vsi_nn_connection_data_t data;
|
||||
}vsi_nn_connection_t;
|
||||
|
||||
typedef struct _vsi_nn_con_candidate_data_t
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
float score;
|
||||
float candAB;
|
||||
}vsi_nn_con_candidate_data_t;
|
||||
|
||||
typedef struct _vsi_nn_con_candidate_t
|
||||
{
|
||||
vsi_nn_link_list_t link_list;
|
||||
vsi_nn_con_candidate_data_t data;
|
||||
}vsi_nn_con_candidate_t;
|
||||
|
||||
typedef struct _vsi_nn_cmupose_multiplier_t
|
||||
{
|
||||
float *size;
|
||||
uint32_t num;
|
||||
}vsi_nn_cmupose_multiplier_t;
|
||||
|
||||
typedef struct _vsi_nn_cmupose_image_t
|
||||
{
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t channel;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_cmupose_image_t;
|
||||
|
||||
typedef struct _vsi_nn_cmupose_scale_search_t
|
||||
{
|
||||
float *size;
|
||||
uint32_t num;
|
||||
}vsi_nn_cmupose_scale_search_t;
|
||||
|
||||
typedef struct _vsi_nn_cmupose_model_t
|
||||
{
|
||||
uint32_t boxsize;
|
||||
uint32_t stride;
|
||||
uint32_t padValue;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_cmupose_model_t;
|
||||
|
||||
typedef struct _vsi_nn_cmupose_param_t
|
||||
{
|
||||
float thre1;
|
||||
float thre2;
|
||||
float thre3;
|
||||
uint32_t mid_num;
|
||||
vsi_nn_cmupose_scale_search_t scale_search;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_cmupose_param_t;
|
||||
|
||||
typedef struct _vsi_nn_cmupose_inputs_t
|
||||
{
|
||||
vsi_nn_tensor_t *net_out;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_cmupose_inputs_t;
|
||||
|
||||
typedef struct _vsi_nn_cmupose_config_t
|
||||
{
|
||||
vsi_nn_cmupose_inputs_t inputs;
|
||||
vsi_nn_cmupose_param_t param;
|
||||
vsi_nn_cmupose_model_t model;
|
||||
vsi_nn_cmupose_image_t image;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_cmupose_config_t;
|
||||
|
||||
OVXLIB_API vsi_status vsi_nn_CMUPose_Post_Process
|
||||
(
|
||||
float *net_out,
|
||||
vsi_nn_cmupose_config_t *config,
|
||||
vsi_nn_peaks_t ***all_peaks_out,
|
||||
uint32_t *all_peaks_num_out,
|
||||
vsi_nn_subset_t **subset_list_out,
|
||||
vsi_nn_peaks_data_t **peak_candidate_out,
|
||||
uint32_t *peak_candidate_num_out
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_status vsi_nn_CMUPose_PostProcess
|
||||
(
|
||||
vsi_nn_graph_t *graph,
|
||||
vsi_nn_cmupose_inputs_t *inputs,
|
||||
vsi_nn_cmupose_image_t *image,
|
||||
vsi_nn_cmupose_param_t *param,
|
||||
vsi_nn_cmupose_model_t *model,
|
||||
vsi_nn_peaks_t ***all_peaks,
|
||||
uint32_t *all_peaks_num,
|
||||
vsi_nn_peaks_data_t **candidate,
|
||||
uint32_t *candidate_num,
|
||||
vsi_nn_subset_t **subset
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_POST_FASTERRCNN_H_
|
||||
#define _VSI_NN_POST_FASTERRCNN_H_
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_node_type.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "utils/vsi_nn_link_list.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_fasterrcnn_box_t
|
||||
{
|
||||
vsi_nn_link_list_t link_list;
|
||||
|
||||
/* upper-left coordinate(x1,y1) */
|
||||
float x1;
|
||||
float y1;
|
||||
/* lower-right coordinate(x2,y2) */
|
||||
float x2;
|
||||
float y2;
|
||||
float score;
|
||||
uint32_t class_id;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_box_t;
|
||||
|
||||
typedef struct _vsi_nn_fasterrcnn_param_t
|
||||
{
|
||||
float conf_thresh;
|
||||
float nms_thresh;
|
||||
const char **classes;
|
||||
uint32_t classes_num;
|
||||
uint32_t rois_num;
|
||||
vsi_nn_proposal_im_info iminfo;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_param_t;
|
||||
|
||||
typedef struct _vsi_nn_fasterrcnn_inputs_t
|
||||
{
|
||||
vsi_nn_tensor_t *rois;
|
||||
vsi_nn_tensor_t *cls;
|
||||
vsi_nn_tensor_t *bbox;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_inputs_t;
|
||||
|
||||
OVXLIB_API vsi_status vsi_nn_FasterRCNN_PostProcess
|
||||
(
|
||||
vsi_nn_graph_t *graph,
|
||||
vsi_nn_fasterrcnn_inputs_t *inputs,
|
||||
vsi_nn_fasterrcnn_param_t *param,
|
||||
vsi_nn_fasterrcnn_box_t **dets_box
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -29,8 +29,9 @@ extern "C"{
|
|||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include "vsi_nn_feature_config.h"
|
||||
|
||||
typedef int64_t vsi_nn_binary_tree_key_t;
|
||||
typedef int64_t VSI_PUBLIC_TYPE vsi_nn_binary_tree_key_t;
|
||||
|
||||
#define vsi_nn_BinaryTreeInitRoot(n) do{n = NULL;} while (0);
|
||||
|
||||
|
|
@ -40,7 +41,7 @@ typedef struct _vsi_nn_binary_tree
|
|||
struct _vsi_nn_binary_tree * right;
|
||||
vsi_nn_binary_tree_key_t key;
|
||||
void * data_ptr;
|
||||
} vsi_nn_binary_tree_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_binary_tree_t;
|
||||
|
||||
OVXLIB_API void vsi_nn_BinaryTreeRemoveNode
|
||||
(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
#ifndef __VSI_NN_DLFCN_H
|
||||
#define __VSI_NN_DLFCN_H
|
||||
|
||||
#if (defined(_MSC_VER) || defined(_WIN32) || defined(__MINGW32))
|
||||
#define RTLD_LAZY 0
|
||||
#define RTLD_NOW 0
|
||||
|
||||
#define RTLD_GLOBAL (1 << 1)
|
||||
#define RTLD_LOCAL (1 << 2)
|
||||
|
||||
#define RTLD_DEFAULT ((void *)0)
|
||||
#define RTLD_NEXT ((void *)-1)
|
||||
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Opend a shared library
|
||||
*
|
||||
* @param[in] Library path
|
||||
* @param[in] Opend mode.
|
||||
*
|
||||
* @return Library handle on success, or NULL otherwise.
|
||||
*/
|
||||
void* vsi_nn_dlopen
|
||||
(
|
||||
const char *file,
|
||||
int mode
|
||||
);
|
||||
|
||||
/**
|
||||
* Close the opened library
|
||||
*
|
||||
* @param[in] Library handler
|
||||
*
|
||||
* @return TRUE on success
|
||||
*/
|
||||
int vsi_nn_dlclose
|
||||
(
|
||||
void *handle
|
||||
);
|
||||
|
||||
/**
|
||||
* Find symbol from opened library
|
||||
*
|
||||
* @param[in] Library handler
|
||||
* @param[in] Symbol name to find.
|
||||
*
|
||||
* @return Symbol
|
||||
*/
|
||||
void* vsi_nn_dlsym
|
||||
(
|
||||
void *handle,
|
||||
const char *name
|
||||
);
|
||||
|
||||
/**
|
||||
* Get error info.
|
||||
*
|
||||
* @return Error message.
|
||||
*/
|
||||
char * vsi_nn_dlerror(void);
|
||||
#endif
|
||||
|
||||
|
|
@ -464,6 +464,7 @@ static VSI_INLINE_API vsi_status dtype_to_float32
|
|||
case VSI_NN_TYPE_BOOL8:
|
||||
case VSI_NN_TYPE_UINT8:
|
||||
case VSI_NN_TYPE_INT16:
|
||||
case VSI_NN_TYPE_UINT16:
|
||||
case VSI_NN_TYPE_INT32:
|
||||
{
|
||||
int32_t src_value = 0;
|
||||
|
|
@ -516,6 +517,7 @@ static VSI_INLINE_API vsi_status float32_to_dtype
|
|||
case VSI_NN_TYPE_BOOL8:
|
||||
case VSI_NN_TYPE_UINT8:
|
||||
case VSI_NN_TYPE_INT16:
|
||||
case VSI_NN_TYPE_UINT16:
|
||||
case VSI_NN_TYPE_INT32:
|
||||
case VSI_NN_TYPE_UINT32:
|
||||
{
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ typedef struct _vsi_nn_link_list
|
|||
{
|
||||
struct _vsi_nn_link_list * prev;
|
||||
struct _vsi_nn_link_list * next;
|
||||
} vsi_nn_link_list_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_link_list_t;
|
||||
|
||||
typedef void ( * vsi_nn_link_list_init_t )
|
||||
(
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@
|
|||
extern "C"{
|
||||
#endif
|
||||
|
||||
typedef vsi_nn_binary_tree_key_t vsi_nn_map_key_t;
|
||||
typedef vsi_nn_binary_tree_key_t VSI_PUBLIC_TYPE vsi_nn_map_key_t;
|
||||
|
||||
typedef struct _vsi_nn_map_key_list
|
||||
{
|
||||
|
|
@ -45,7 +45,7 @@ typedef struct _vsi_nn_map
|
|||
int size;
|
||||
vsi_nn_map_key_list_t * keys;
|
||||
vsi_nn_binary_tree_t * values;
|
||||
} vsi_nn_map_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_map_t;
|
||||
|
||||
OVXLIB_API void vsi_nn_MapInit
|
||||
(
|
||||
|
|
|
|||
|
|
@ -99,6 +99,30 @@ typedef enum vx_nn_activation_function_e vx_co
|
|||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_NONE VX_NN_ACTIVATION_NONE
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SWISH VX_NN_ACTIVATION_SWISH
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HWISH VX_NN_ACTIVATION_HSWISH
|
||||
#if (VX_ACTIVATION_EXT2_SUPPORT)
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SIGN VX_NN_ACTIVATION_SIGN_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HSIGMOID VX_NN_ACTIVATION_HSIGMOID_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_NEG VX_NN_ACTIVATION_NEG_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_CLIP VX_NN_ACTIVATION_CLIP_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_EXP VX_NN_ACTIVATION_EXP_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SIN VX_NN_ACTIVATION_SIN_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_COS VX_NN_ACTIVATION_COS_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_LOG VX_NN_ACTIVATION_LOG_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_MISH VX_NN_ACTIVATION_MISH_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_GELU VX_NN_ACTIVATION_GELU_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HGELU VX_NN_ACTIVATION_HGELU_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ELU VX_NN_ACTIVATION_ELU_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SELU VX_NN_ACTIVATION_SELU_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_CELU VX_NN_ACTIVATION_CELU_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_RECIPROCAL VX_NN_ACTIVATION_RECIPROCAL_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SOFTSIGN VX_NN_ACTIVATION_SOFTSIGN_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ATAN VX_NN_ACTIVATION_ATAN_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ATANH VX_NN_ACTIVATION_ATANH_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ACOSH VX_NN_ACTIVATION_ACOSH_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_INVERSE_SIGMOID VX_NN_ACTIVATION_INVERSE_SIGMOID_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ROUND VX_NN_ACTIVATION_ROUND_VSI
|
||||
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ERF VX_NN_ACTIVATION_ERF_VSI
|
||||
#endif
|
||||
|
||||
/*
|
||||
keep the backward compatibility with spec 1.1 for vxCopyTensorPatch_11
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ typedef struct _vsi_nn_runtime_option_t
|
|||
int32_t enable_concat_optimize;
|
||||
int32_t enable_asymi8_to_u8;
|
||||
int32_t enable_dataconvert_optimize;
|
||||
int32_t enable_stream_processor;
|
||||
} vsi_nn_runtime_option_t;
|
||||
|
||||
/**
|
||||
|
|
@ -87,7 +88,7 @@ typedef struct _vsi_nn_context_t
|
|||
vx_context c;
|
||||
vsi_nn_hw_config_t config;
|
||||
vsi_nn_runtime_option_t options;
|
||||
} *vsi_nn_context_t;
|
||||
} VSI_PUBLIC_TYPE *vsi_nn_context_t;
|
||||
|
||||
/**
|
||||
* Create context
|
||||
|
|
|
|||
|
|
@ -1,7 +1,46 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2019 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the Software),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
/*****Auto generated header file, Please DO NOT modify manually!*****/
|
||||
#ifndef _VSI_NN_FEATURE_CONFIG_H
|
||||
#define _VSI_NN_FEATURE_CONFIG_H
|
||||
|
||||
#define VSI_PUBLIC_TYPE
|
||||
#include <VX/vx_khr_cnn.h>
|
||||
#if defined(VX_KHR_COMPATIBILITY) && (0x1==VX_KHR_COMPATIBILITY)
|
||||
#include <VX/vx_khr_compatible.h>
|
||||
#endif
|
||||
#ifndef VSI_PERCHANNEL_QUANTIZATION_SUPPORT
|
||||
#define VSI_PERCHANNEL_QUANTIZATION_SUPPORT
|
||||
#endif
|
||||
#if defined(VX_INVALIDATE_HANDLE_SUPPORT) && VX_INVALIDATE_HANDLE_SUPPORT
|
||||
#define VSI_INVALIDATE_HANDLE_SUPPORT
|
||||
#endif
|
||||
#ifndef VSI_0_D_TENSOR_SUPPORT
|
||||
#define VSI_0_D_TENSOR_SUPPORT
|
||||
#endif
|
||||
#if defined(VX_TENSORVIEW_ON_ANY_DIM) && VX_TENSORVIEW_ON_ANY_DIM
|
||||
#define VSI_CONCAT_ENHANCE_SUPPORT
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ extern "C" {
|
|||
/**
|
||||
* Graph structure
|
||||
*/
|
||||
struct _vsi_nn_graph
|
||||
struct VSI_PUBLIC_TYPE _vsi_nn_graph
|
||||
{
|
||||
/** Context */
|
||||
vsi_nn_context_t ctx;
|
||||
|
|
@ -167,6 +167,8 @@ struct _vsi_nn_graph
|
|||
} complete_signal;
|
||||
|
||||
vsi_bool isAllowFastMode;
|
||||
|
||||
//DO NOT modify this sturct.
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ typedef enum _vsi_nn_log_level_e
|
|||
VSI_NN_LOG_WARN,
|
||||
VSI_NN_LOG_INFO,
|
||||
VSI_NN_LOG_DEBUG
|
||||
}vsi_nn_log_level_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_log_level_e;
|
||||
|
||||
#define VSI_NN_MAX_DEBUG_BUFFER_LEN 1024
|
||||
#define VSILOGE( fmt, ... ) \
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ typedef struct _vsi_nn_node_attr_t
|
|||
} vsi_nn_node_attr_t;
|
||||
|
||||
/** Node structure */
|
||||
struct _vsi_nn_node
|
||||
struct VSI_PUBLIC_TYPE _vsi_nn_node
|
||||
{
|
||||
/**
|
||||
* Graph handle
|
||||
|
|
|
|||
|
|
@ -200,8 +200,17 @@
|
|||
#include "ops/vsi_nn_op_scatter_elements.h"
|
||||
#include "ops/vsi_nn_op_pre_process_yuv422.h"
|
||||
#include "ops/vsi_nn_op_bucketize.h"
|
||||
#include "ops/vsi_nn_op_globallppool.h"
|
||||
#include "ops/vsi_nn_op_gather_nd.h"
|
||||
#include "ops/vsi_nn_op_avg_pool3d.h"
|
||||
#include "ops/vsi_nn_op_maxunpool.h"
|
||||
#include "ops/vsi_nn_op_reversesequence.h"
|
||||
#include "ops/vsi_nn_op_grid_sample.h"
|
||||
#include "ops/vsi_nn_op_bilinear_grid_sample.h"
|
||||
#include "ops/vsi_nn_op_lpnorm.h"
|
||||
/* custom node head define define */
|
||||
#include "custom/vsi_nn_custom_node_type.h"
|
||||
#include "ops/vsi_nn_op_inverse_sigmoid.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"{
|
||||
|
|
@ -386,6 +395,15 @@ typedef union _vsi_nn_nn_param
|
|||
vsi_nn_scatter_elements_param scatter_elements;
|
||||
vsi_nn_pre_process_yuv422_param pre_process_yuv422;
|
||||
vsi_nn_bucketize_param bucketize;
|
||||
vsi_nn_globallppool_param globallppool;
|
||||
vsi_nn_gather_nd_param gather_nd;
|
||||
vsi_nn_avg_pool3d_param avg_pool3d;
|
||||
vsi_nn_maxunpool_param maxunpool;
|
||||
vsi_nn_reversesequence_param reversesequence;
|
||||
vsi_nn_inverse_sigmoid_param inverse_sigmoid;
|
||||
vsi_nn_grid_sample_param gridsample;
|
||||
vsi_nn_bilinear_grid_sample_param bilinear_grid_sample;
|
||||
vsi_nn_lpnorm_param lpnorm;
|
||||
void* client_param;
|
||||
|
||||
/* custom node data struct define */
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ extern "C"{
|
|||
* @see include/custom/custom_ops.def
|
||||
* @see include/internal/internal_ops.def
|
||||
*/
|
||||
typedef int32_t vsi_nn_op_t; enum
|
||||
typedef int32_t VSI_PUBLIC_TYPE vsi_nn_op_t; enum
|
||||
{
|
||||
#define DEF_OP( NAME, ... ) VSI_NN_OP_##NAME,
|
||||
#include "interface/ops.def"
|
||||
|
|
@ -126,7 +126,7 @@ typedef struct _vsi_nn_op_proc
|
|||
vsi_nn_op_optimize_t optimize;
|
||||
uint32_t input_num;
|
||||
uint32_t output_num;
|
||||
} vsi_nn_op_proc_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_op_proc_t;
|
||||
|
||||
/*------------------------------------
|
||||
Functions
|
||||
|
|
|
|||
|
|
@ -26,13 +26,6 @@
|
|||
|
||||
#include "vsi_nn_feature_config.h"
|
||||
|
||||
#ifdef VSI_40BIT_VA_SUPPORT
|
||||
#ifdef VX_VA40_EXT_SUPPORT
|
||||
#undef VX_VA40_EXT_SUPPORT
|
||||
#endif
|
||||
#define VX_VA40_EXT_SUPPORT 1
|
||||
#endif
|
||||
|
||||
#include <VX/vx_khr_cnn.h>
|
||||
#include <VX/vx_helper.h>
|
||||
#include <VX/vx_ext_program.h>
|
||||
|
|
@ -48,12 +41,4 @@
|
|||
*/
|
||||
#include "vsi_nn_compatibility.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ typedef enum
|
|||
VSI_NN_SOURCE_FORMAT_IMAGE_RGB888_PLANAR_SEP,
|
||||
VSI_NN_SOURCE_FORMAT_IMAGE_YUYV422,
|
||||
VSI_NN_SOURCE_FORMAT_IMAGE_UYVY422,
|
||||
VSI_NN_SOURCE_FORMAT_IMAGE_NV21,
|
||||
} vsi_nn_preprocess_source_format_e;
|
||||
|
||||
/**
|
||||
|
|
@ -98,7 +99,7 @@ typedef struct
|
|||
vsi_nn_preprocess_type_e type;
|
||||
/** Preprocess paramters */
|
||||
void* param;
|
||||
} vsi_nn_preprocess_base_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_preprocess_base_t;
|
||||
|
||||
/**
|
||||
* Postprocess base structure
|
||||
|
|
@ -109,7 +110,7 @@ typedef struct
|
|||
vsi_nn_postprocess_type_e type;
|
||||
/** Postrocess paramters */
|
||||
void* param;
|
||||
} vsi_nn_postprocess_base_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_postprocess_base_t;
|
||||
|
||||
/**
|
||||
* Process dtype convert parameter structure
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@
|
|||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_version.h"
|
||||
#include "vsi_nn_assert.h"
|
||||
#include "vsi_nn_post.h"
|
||||
#include "vsi_nn_rnn.h"
|
||||
#include "vsi_nn_test.h"
|
||||
#include "vsi_nn_pre_post_process.h"
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ typedef struct
|
|||
{
|
||||
vsi_nn_tensor_id_t output;
|
||||
vsi_nn_tensor_id_t inputs[VSI_NN_MAX_RNN_CONNECTION_INPUTS];
|
||||
} vsi_nn_rnn_external_connection_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_rnn_external_connection_t;
|
||||
|
||||
/*-------------------------------------------
|
||||
Procedure to prepare input data, return FALSE
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ typedef enum
|
|||
VSI_NN_DIM_FMT_NHWC = 0x01,
|
||||
VSI_NN_DIM_FMT_NA = 0xFF,
|
||||
VSI_NN_DIM_FMT_AUTO = VSI_NN_DIM_FMT_NA - 1,
|
||||
} vsi_nn_dim_fmt_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_dim_fmt_e;
|
||||
|
||||
/**
|
||||
* Quantization type.
|
||||
|
|
@ -125,7 +125,7 @@ typedef struct vsi_nn_dtype
|
|||
#endif
|
||||
};
|
||||
};
|
||||
} vsi_nn_dtype_t;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_dtype_t;
|
||||
|
||||
/**
|
||||
* Tensor Attribute
|
||||
|
|
@ -150,15 +150,13 @@ typedef struct vsi_nn_tensor_attr
|
|||
#ifdef VX_CREATE_TENSOR_SUPPORT_PHYSICAL
|
||||
vsi_memory_type_e vsi_memory_type;
|
||||
#endif
|
||||
#if VX_STREAM_PROCESSOR_SUPPORT
|
||||
vsi_bool is_dummy;
|
||||
#endif
|
||||
} vsi_nn_tensor_attr_t;
|
||||
// DO NOT modify this struct.
|
||||
} VSI_PUBLIC_TYPE vsi_nn_tensor_attr_t;
|
||||
|
||||
/**
|
||||
* Tensor structure
|
||||
*/
|
||||
struct _vsi_nn_tensor
|
||||
struct VSI_PUBLIC_TYPE _vsi_nn_tensor
|
||||
{
|
||||
/** Tensor attributes */
|
||||
vsi_nn_tensor_attr_t attr;
|
||||
|
|
@ -168,6 +166,7 @@ struct _vsi_nn_tensor
|
|||
vx_weights_biases_parameter wb;
|
||||
/** Mark tensor swapped by vxSwapTensor */
|
||||
int8_t is_swapped;
|
||||
// DO NOT modify this struct.
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -321,10 +321,38 @@ OVXLIB_API vsi_status vsi_nn_CopyDataToTensor
|
|||
);
|
||||
|
||||
/**
|
||||
* Flush Handle
|
||||
* If you swap the handle of the tensor, you should flush it.
|
||||
* Swap a tensor's Handle
|
||||
* Swap handle to old_ptr to read/write, swap new handle to new_ptr to update handle.
|
||||
*
|
||||
* @param[in] tensor Tensor handle.
|
||||
* APP SHOULD maintain handle that created by itself to manage memory correctly,
|
||||
* never free or wirte data for handel allocated by OVXLIB.
|
||||
*
|
||||
* OVXLIB would not maintain original handle anymore if new_ptr == NULL.
|
||||
*
|
||||
* Before free data in handle allocated by APP, vsi_nn_SwapHandle(tensor, NULL, &prev_ptr)
|
||||
* should be called to get contol of handle.
|
||||
*
|
||||
* @param[in] tensor Tensor.
|
||||
* @param[in] new_ptr New handle of tensor.
|
||||
* @param[in] is_new_ptr_malloc_by_ovxlib If new_ptr is allocated by ovxlib while new_ptr is not NULL.
|
||||
* @param[out] old_ptr Old handle of tensor.
|
||||
*
|
||||
* @return VSI_SUCCESS on success, or error core otherwise.
|
||||
*/
|
||||
OVXLIB_API vsi_status vsi_nn_SwapHandle
|
||||
(
|
||||
vsi_nn_tensor_t* tensor,
|
||||
void* new_ptr,
|
||||
vsi_bool is_new_ptr_malloc_by_ovxlib,
|
||||
void** old_ptr
|
||||
);
|
||||
|
||||
/**
|
||||
* Flush Handle
|
||||
* Call this function to flush new data to the handle in hand.
|
||||
* vsi_nn_FlushHandle() should be called at last to compleate the data writting operation.
|
||||
*
|
||||
* @param[in] tensor Tensor.
|
||||
*
|
||||
* @return VSI_SUCCESS on success, or error core otherwise.
|
||||
*/
|
||||
|
|
@ -333,6 +361,20 @@ OVXLIB_API vsi_status vsi_nn_FlushHandle
|
|||
const vsi_nn_tensor_t * tensor
|
||||
);
|
||||
|
||||
/**
|
||||
* Invalidate Handle
|
||||
* invalidate handle before copy data from tensor handle.
|
||||
* Before read data in handle, vsi_nn_InvalidateHandle() should be called to do invalidate cache in APP.
|
||||
*
|
||||
* @param[in] tensor Tensor.
|
||||
*
|
||||
* @return VSI_SUCCESS on success, or error core otherwise.
|
||||
*/
|
||||
OVXLIB_API vsi_status vsi_nn_InvalidateHandle
|
||||
(
|
||||
const vsi_nn_tensor_t* tensor
|
||||
);
|
||||
|
||||
/**
|
||||
* Get Tensor Handle
|
||||
* Get the handle of the tensor
|
||||
|
|
@ -348,6 +390,34 @@ OVXLIB_API vsi_status vsi_nn_GetTensorHandle
|
|||
void** ptr
|
||||
);
|
||||
|
||||
/**
|
||||
* Get Tensor is_scalar
|
||||
* Get the is_scalar of the tensor
|
||||
*
|
||||
* @param[in] tensor Tensor.
|
||||
*
|
||||
* @return is_scalar flag of the tensor.
|
||||
*/
|
||||
OVXLIB_API int8_t vsi_nn_GetTensorIsScalar
|
||||
(
|
||||
vsi_nn_tensor_t* tensor
|
||||
);
|
||||
|
||||
/**
|
||||
* Set Tensor is_scalar
|
||||
* Set the is_scalar for the tensor
|
||||
*
|
||||
* @param[in] tensor Tensor.
|
||||
* @param[in] new is_scalar value of the tensor.
|
||||
*
|
||||
* @return VSI_SUCCESS on success, or error core otherwise.
|
||||
*/
|
||||
OVXLIB_API vsi_status vsi_nn_SetTensorIsScalar
|
||||
(
|
||||
vsi_nn_tensor_t* tensor,
|
||||
int8_t is_scalar
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_status vsi_nn_CopyRawDataToTensor
|
||||
(
|
||||
vsi_nn_graph_t* graph,
|
||||
|
|
@ -722,13 +792,6 @@ vsi_nn_tensor_t* vsi_nn_ConstTensorAdd_impl
|
|||
#define vsi_nn_ConstTensorAdd(_graph, _output_attr, ...) \
|
||||
vsi_nn_ConstTensorAdd_impl(_graph, _output_attr, __VA_ARGS__, END_OF_VARIADIC_ARGUMENTS)
|
||||
|
||||
vsi_status vsi_nn_SwapHandle
|
||||
(
|
||||
vsi_nn_tensor_t * tensor,
|
||||
void * new_ptr,
|
||||
void ** old_ptr
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_ConvertTensor
|
||||
(
|
||||
vsi_nn_graph_t* graph,
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@
|
|||
|
||||
#include <stdint.h>
|
||||
#include "vsi_nn_platform.h"
|
||||
#include "vsi_nn_feature_config.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"{
|
||||
|
|
@ -109,7 +108,7 @@ typedef enum
|
|||
VSI_NN_PAD_AUTO,
|
||||
VSI_NN_PAD_VALID,
|
||||
VSI_NN_PAD_SAME
|
||||
} vsi_nn_pad_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_pad_e;
|
||||
|
||||
/** reduce type enum */
|
||||
typedef enum
|
||||
|
|
@ -142,14 +141,14 @@ typedef enum
|
|||
{
|
||||
VSI_NN_ROUND_CEIL,
|
||||
VSI_NN_ROUND_FLOOR
|
||||
} vsi_nn_round_type_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_round_type_e;
|
||||
|
||||
/** Optimize driction */
|
||||
typedef enum
|
||||
{
|
||||
VSI_NN_OPTIMIZE_FORWARD,
|
||||
VSI_NN_OPTIMIZE_BACKWARD
|
||||
} vsi_nn_opt_direction_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_opt_direction_e;
|
||||
#ifdef VX_CREATE_TENSOR_SUPPORT_PHYSICAL
|
||||
typedef enum
|
||||
{
|
||||
|
|
@ -195,7 +194,7 @@ typedef enum
|
|||
#endif
|
||||
VSI_NN_TYPE_VDATA = VX_TYPE_USER_STRUCT_START + 0x1,
|
||||
|
||||
}vsi_nn_type_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_type_e;
|
||||
|
||||
typedef int32_t vsi_nn_activation_e; enum
|
||||
{
|
||||
|
|
@ -236,7 +235,7 @@ typedef enum
|
|||
{
|
||||
VSI_NN_GRAPH_PRELOAD_VIPSRAM,
|
||||
VSI_NN_GRAPH_PRELOAD_AXISRAM
|
||||
} vsi_nn_graph_attr_preload_type_e;
|
||||
} VSI_PUBLIC_TYPE vsi_nn_graph_attr_preload_type_e;
|
||||
|
||||
typedef enum _vsi_nn_node_attr_preload_type_e
|
||||
{
|
||||
|
|
@ -257,23 +256,35 @@ typedef enum _vsi_nn_yuv_type
|
|||
VSI_NN_YUV_TYPE_UYUV422
|
||||
}vsi_nn_yuv_type;
|
||||
|
||||
typedef enum _vsi_nn_nv_type
|
||||
{
|
||||
VSI_NN_YUV_TYPE_NV12,
|
||||
VSI_NN_YUV_TYPE_NV21
|
||||
}vsi_nn_nv_type;
|
||||
|
||||
typedef enum _vsi_nn_roi_align_type_e
|
||||
{
|
||||
VSI_NN_ROI_ALIGN_ANDROID,
|
||||
VSI_NN_ROI_ALIGN
|
||||
} vsi_nn_roi_align_type_e;
|
||||
|
||||
/** Deprecated */
|
||||
typedef uint32_t vsi_nn_size_t;
|
||||
|
||||
/** Tensor id type */
|
||||
typedef uint32_t vsi_nn_tensor_id_t;
|
||||
typedef uint32_t VSI_PUBLIC_TYPE vsi_nn_tensor_id_t;
|
||||
|
||||
/** Node id type */
|
||||
typedef uint32_t vsi_nn_node_id_t;
|
||||
|
||||
/** @see _vsi_nn_graph */
|
||||
typedef struct _vsi_nn_graph vsi_nn_graph_t;
|
||||
typedef struct _vsi_nn_graph VSI_PUBLIC_TYPE vsi_nn_graph_t;
|
||||
|
||||
/** @see _vsi_nn_node */
|
||||
typedef struct _vsi_nn_node vsi_nn_node_t;
|
||||
typedef struct _vsi_nn_node VSI_PUBLIC_TYPE vsi_nn_node_t;
|
||||
|
||||
/** @see _vsi_nn_tensor */
|
||||
typedef struct _vsi_nn_tensor vsi_nn_tensor_t;
|
||||
typedef struct _vsi_nn_tensor VSI_PUBLIC_TYPE vsi_nn_tensor_t;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ extern "C"{
|
|||
|
||||
#define VSI_NN_VERSION_MAJOR 1
|
||||
#define VSI_NN_VERSION_MINOR 1
|
||||
#define VSI_NN_VERSION_PATCH 57
|
||||
#define VSI_NN_VERSION_PATCH 74
|
||||
#define VSI_NN_VERSION \
|
||||
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,144 @@
|
|||
#
|
||||
# Build Vivante chipinfo for android.
|
||||
#
|
||||
LOCAL_PATH:= $(call my-dir)
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
ifeq ($(AQROOT),)
|
||||
$(error Please set AQROOT env first)
|
||||
endif
|
||||
|
||||
include $(AQROOT)/Android.mk.def
|
||||
|
||||
ifeq ($(PLATFORM_VENDOR),1)
|
||||
LOCAL_VENDOR_MODULE := true
|
||||
endif
|
||||
|
||||
LOCAL_SRC_FILES := \
|
||||
vsi_nn_context.c \
|
||||
vsi_nn_client_op.c \
|
||||
vsi_nn_graph.c \
|
||||
vsi_nn_node_attr_template.c \
|
||||
vsi_nn_node.c \
|
||||
vsi_nn_ops.c \
|
||||
vsi_nn_daemon.c \
|
||||
vsi_nn_tensor.c \
|
||||
vsi_nn_version.c \
|
||||
vsi_nn_rnn.c \
|
||||
vsi_nn_rnn_helper.c \
|
||||
vsi_nn_internal_node.c \
|
||||
vsi_nn_log.c \
|
||||
vsi_nn_graph_optimization.c \
|
||||
vsi_nn_pre_post_process.c
|
||||
|
||||
|
||||
LOCAL_SRC_FILES += \
|
||||
utils/vsi_nn_code_generator.c \
|
||||
utils/vsi_nn_binary_tree.c \
|
||||
utils/vsi_nn_map.c \
|
||||
utils/vsi_nn_hashmap.c \
|
||||
utils/vsi_nn_link_list.c \
|
||||
utils/vsi_nn_math.c \
|
||||
utils/vsi_nn_dtype.c \
|
||||
utils/vsi_nn_dtype_util.c \
|
||||
utils/vsi_nn_shape_util.c \
|
||||
utils/vsi_nn_limits.c \
|
||||
utils/vsi_nn_tensor_op.c \
|
||||
utils/vsi_nn_util.c \
|
||||
utils/vsi_nn_dlfcn.c \
|
||||
utils/vsi_nn_constraint_check.c
|
||||
|
||||
|
||||
LOCAL_SRC_FILES += \
|
||||
quantization/vsi_nn_dynamic_fixed_point.c \
|
||||
quantization/vsi_nn_asymmetric_affine.c \
|
||||
quantization/vsi_nn_perchannel_symmetric_affine.c \
|
||||
|
||||
|
||||
LOCAL_SRC_FILES += \
|
||||
post/vsi_nn_post_fasterrcnn.c \
|
||||
post/vsi_nn_post_cmupose.c
|
||||
|
||||
LOCAL_SRC_FILES += \
|
||||
cpu_backend/vsi_nn_cpu_backend.c \
|
||||
cpu_backend/vsi_nn_cpu_backend_conv2d.c \
|
||||
cpu_backend/vsi_nn_cpu_backend_deconv2d.c \
|
||||
cpu_backend/npuref_interface.c
|
||||
|
||||
|
||||
LOCAL_SRC_FILES += libnnext/vsi_nn_libnnext_resource.c \
|
||||
libnnext/vsi_nn_vxkernel.c
|
||||
|
||||
LOCAL_SRC_FILES += kernel/vsi_nn_kernel.c \
|
||||
kernel/vsi_nn_kernel_util.c \
|
||||
kernel/vsi_nn_kernel_backend.c \
|
||||
kernel/vsi_nn_kernel_eltwise.c \
|
||||
kernel/vsi_nn_kernel_selector.c \
|
||||
kernel/vsi_nn_kernel_node.c \
|
||||
kernel/vsi_nn_kernel_param.c \
|
||||
kernel/vsi_nn_kernel_gpu_shape_optimize.c \
|
||||
kernel/vsi_nn_kernel_lut.c \
|
||||
kernel/vsi_nn_spinst.c \
|
||||
kernel/vsi_nn_sp_unit_operation.c \
|
||||
kernel/vsi_nn_sp_lut.c \
|
||||
kernel/vsi_nn_gpu.c
|
||||
|
||||
LIBNNEXT_KERNEL_SOURCES := $(wildcard $(LOCAL_PATH)/libnnext/ops/kernel/*.c)
|
||||
LOCAL_SRC_FILES += $(LIBNNEXT_KERNEL_SOURCES:$(LOCAL_PATH)/%=%)
|
||||
|
||||
KERNEL_SOURCES := $(wildcard $(LOCAL_PATH)/kernel/cl/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/cpu/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/evis/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/vx/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/sp/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/evis/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/cl/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/cpu/*.c)
|
||||
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/sp/*.c)
|
||||
LOCAL_SRC_FILES += $(KERNEL_SOURCES:$(LOCAL_PATH)/%=%)
|
||||
|
||||
OPERATION_SOURCES := $(wildcard $(LOCAL_PATH)/ops/*.c)
|
||||
LOCAL_SRC_FILES += $(OPERATION_SOURCES:$(LOCAL_PATH)/%=%)
|
||||
|
||||
|
||||
LOCAL_SHARED_LIBRARIES := \
|
||||
liblog \
|
||||
libjpeg \
|
||||
libGAL \
|
||||
libOpenVX \
|
||||
libVSC \
|
||||
libdl
|
||||
|
||||
LOCAL_C_INCLUDES += \
|
||||
external/libjpeg-turbo \
|
||||
$(AQROOT)/sdk/inc/CL \
|
||||
$(AQROOT)/sdk/inc/VX \
|
||||
$(AQROOT)/sdk/inc/ \
|
||||
$(AQROOT)/sdk/inc/HAL \
|
||||
$(LOCAL_PATH)/../include \
|
||||
$(LOCAL_PATH)/../include/ops \
|
||||
$(LOCAL_PATH)/../include/utils \
|
||||
$(LOCAL_PATH)/../include/infernce \
|
||||
$(LOCAL_PATH)/../include/client \
|
||||
$(LOCAL_PATH)/../include/cpu_backend \
|
||||
$(LOCAL_PATH)/../include/libnnext \
|
||||
$(LOCAL_PATH)/../src
|
||||
|
||||
LOCAL_CFLAGS := \
|
||||
-DLINUX \
|
||||
-D'OVXLIB_API=__attribute__((visibility("default")))' \
|
||||
-DANDROID_SDK_VERSION=$(PLATFORM_SDK_VERSION)\
|
||||
-Wno-sign-compare \
|
||||
-Wno-implicit-function-declaration \
|
||||
-Wno-sometimes-uninitialized \
|
||||
-Wno-unused-parameter \
|
||||
-Wno-enum-conversion \
|
||||
-Wno-missing-field-initializers \
|
||||
-Wno-tautological-compare \
|
||||
-Wno-missing-braces
|
||||
|
||||
LOCAL_MODULE:= libovxlib
|
||||
LOCAL_MODULE_TAGS := optional
|
||||
LOCAL_PRELINK_MODULE := false
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_platform.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_test.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
#define _CPU_ARG_NUM (1)
|
||||
#define _CPU_INPUT_NUM (2)
|
||||
#define _CPU_OUTPUT_NUM (1)
|
||||
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
|
||||
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
|
||||
#define _KERNEL_NAME ("com.vivantecorp.extension.CustomSampleVXC")
|
||||
|
||||
#define SCALAR_INPUT_AXIS (3)
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
DEF_KERNEL_EXECUTOR(_softmax_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t* param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VX_SUCCESS;
|
||||
float *buffer[_CPU_IO_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *attr[_CPU_IO_NUM] = {NULL};
|
||||
uint32_t i = 0, out_elements = 0;
|
||||
int32_t axis;
|
||||
|
||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; // input0
|
||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; // input1
|
||||
tensors[2] = (vsi_nn_kernel_tensor_t)param[2]; // output
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create(tensors[0]);
|
||||
attr[1] = vsi_nn_kernel_tensor_attr_create(tensors[1]);
|
||||
attr[2] = vsi_nn_kernel_tensor_attr_create(tensors[2]);
|
||||
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
/* alloc the float32 data buffer */
|
||||
buffer[0] = (float *)vsi_nn_kernel_tensor_create_buffer(tensors[0], attr[0], TRUE);
|
||||
CHECK_PTR_FAIL_GOTO(buffer[0], "Create input0 buffer fail.", final);
|
||||
|
||||
buffer[1] = (float *)vsi_nn_kernel_tensor_create_buffer(tensors[1], attr[1], TRUE);
|
||||
CHECK_PTR_FAIL_GOTO(buffer[1], "Create input1 buffer fail.", final);
|
||||
|
||||
out_elements = (uint32_t)vsi_nn_kernel_tensor_attr_get_size(attr[2]);
|
||||
buffer[2] = (float *)malloc(out_elements * sizeof(float));
|
||||
CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
|
||||
memset(buffer[2], 0, out_elements * sizeof(float));
|
||||
|
||||
/* CPU implement */
|
||||
for(i = 0; i < out_elements; i++)
|
||||
{
|
||||
buffer[2][i] = buffer[0][i] + buffer[1][0];
|
||||
}
|
||||
|
||||
status = vsi_nn_kernel_tensor_write_from_float(
|
||||
tensors[2], attr[2], buffer[2], out_elements );
|
||||
final:
|
||||
for(i = 0; i < _CPU_IO_NUM; i ++)
|
||||
{
|
||||
if(buffer[i])
|
||||
{
|
||||
free(buffer[i]);
|
||||
}
|
||||
vsi_nn_kernel_tensor_attr_release(&attr[i]);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
static vx_param_description_t kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_softmax_compute,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
vsi_nn_tensor_t* const* const outputs,
|
||||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
return VSI_SUCCESS;
|
||||
}
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis = 0;
|
||||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
status = _query_kernel(inputs, outputs, kernel);
|
||||
if(status != VSI_SUCCESS)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
node = vsi_nn_kernel_create_node(graph, kernel);
|
||||
if(node == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io(backend_params, _CPU_PARAM_NUM,
|
||||
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM);
|
||||
backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
|
||||
graph, I32, &axis);
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param(node, backend_params, _CPU_PARAM_NUM);
|
||||
vsi_nn_kernel_scalar_release(&backend_params[SCALAR_INPUT_AXIS]);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( custom_sample, _setup )
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#include <stdlib.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_platform.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_ops.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
static vsi_status op_compute
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
vsi_nn_kernel_param_t *param = NULL;
|
||||
vsi_nn_custom_sample_param *p;
|
||||
p = &self->nn_param.custom_sample;
|
||||
|
||||
param = vsi_nn_kernel_param_create();
|
||||
vsi_nn_kernel_param_add_int32(param, "axis", p->axis);
|
||||
|
||||
self->n = (vx_node)vsi_nn_kernel_selector(
|
||||
self->graph,
|
||||
"custom_sample",
|
||||
inputs, 2,
|
||||
outputs, 1,
|
||||
param);
|
||||
|
||||
vsi_nn_kernel_param_release(¶m);
|
||||
return VSI_SUCCESS;
|
||||
}
|
||||
|
||||
static vsi_bool op_check
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
/*TODO: Check params. */
|
||||
return TRUE;
|
||||
} /* op_check() */
|
||||
|
||||
static vsi_bool op_setup
|
||||
(
|
||||
vsi_nn_node_t * node,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num)
|
||||
{
|
||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||
memmove(outputs[0]->attr.size, inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num * sizeof(vsi_size_t));
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/* Registrar */
|
||||
DEF_OP_REG
|
||||
(
|
||||
/* op_name */ CUSTOM_SAMPLE,
|
||||
/* init */ NULL,
|
||||
/* compute */ op_compute,
|
||||
/* deinit */ vsi_nn_op_common_deinit,
|
||||
/* check */ op_check,
|
||||
/* setup */ op_setup,
|
||||
/* optimize */ NULL,
|
||||
/* input_num */ 2,
|
||||
/* output_num */ 1
|
||||
);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,354 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
INTERNAL_KERNEL_AVG_POOL3D,
|
||||
} _internal_kernel_e;
|
||||
|
||||
#define _AVG_POOL3D_KERNEL_SOURCE_NAME "avg_pool3d"
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define AVG_POOL3D_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
|
||||
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
|
||||
#define AVG_POOL3D_KERNELS( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ AVG_POOL3D_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
|
||||
CVIVANTE_NAMESPACE("cl.avg_pool3d_"#IN_DTYPE"to"#OUT_DTYPE), \
|
||||
_AVG_POOL3D_KERNEL_SOURCE_NAME }, \
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _avg_pool3d_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
AVG_POOL3D_KERNELS( F32, F32 )
|
||||
AVG_POOL3D_KERNELS( F32, U32 )
|
||||
AVG_POOL3D_KERNELS( F32, I32 )
|
||||
AVG_POOL3D_KERNELS( U32, U32 )
|
||||
AVG_POOL3D_KERNELS( U32, F32 )
|
||||
AVG_POOL3D_KERNELS( I32, I32 )
|
||||
AVG_POOL3D_KERNELS( I32, F32 )
|
||||
AVG_POOL3D_KERNELS( BF16, BF16 )
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _avg_pool3d_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _AVG_POOL3D_PARAM_NUM _cnt_of_array( _avg_pool3d_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_avg_pool3d_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[1];
|
||||
vx_scalar depth_out = (vx_scalar)param[14];
|
||||
int32_t depth_out_value;
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
vxReadScalarValue(depth_out, &depth_out_value);
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
||||
output_shape = output_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = depth_out_value;
|
||||
gpu_param.global_size[0] = (output_shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0];
|
||||
gpu_param.global_size[1] = (output_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||
/ gpu_param.global_scale[1];
|
||||
gpu_param.global_size[2] = (output_shape->data[2] + gpu_param.global_scale[2] - 1)
|
||||
/ gpu_param.global_scale[2];
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
|
||||
final:
|
||||
if (output_attr)
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release(&output_attr);
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _avg_pool3d_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype;
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
const _kernel_map_type * kernel_map = _avg_pool3d_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _avg_pool3d_kernel_map );
|
||||
vx_param_description_t * param_def = _avg_pool3d_kernel_param_def;
|
||||
vx_kernel_initialize_f initializer = _avg_pool3d_initializer;
|
||||
|
||||
uint32_t key = 0;
|
||||
uint32_t i = 0;
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
|
||||
(( in_dtype ) | (out_dtype << 8 ))
|
||||
switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
|
||||
{
|
||||
case _PACK_SELECT_KEY(F32, F32):
|
||||
case _PACK_SELECT_KEY(F16, F16):
|
||||
case _PACK_SELECT_KEY(F32, F16):
|
||||
case _PACK_SELECT_KEY(F16, F32):
|
||||
key = AVG_POOL3D_HASH_KEY( F32, F32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, U8):
|
||||
case _PACK_SELECT_KEY(F16, U8):
|
||||
key = AVG_POOL3D_HASH_KEY( F32, U32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, I8):
|
||||
case _PACK_SELECT_KEY(F32, I16):
|
||||
case _PACK_SELECT_KEY(F16, I8):
|
||||
case _PACK_SELECT_KEY(F16, I16):
|
||||
key = AVG_POOL3D_HASH_KEY( F32, I32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, U8):
|
||||
key = AVG_POOL3D_HASH_KEY( U32, U32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, F16):
|
||||
case _PACK_SELECT_KEY(U8, F32):
|
||||
key = AVG_POOL3D_HASH_KEY( U32, F32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, I8):
|
||||
case _PACK_SELECT_KEY(I8, I16):
|
||||
case _PACK_SELECT_KEY(I16, I8):
|
||||
case _PACK_SELECT_KEY(I16, I16):
|
||||
key = AVG_POOL3D_HASH_KEY( I32, I32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, F16):
|
||||
case _PACK_SELECT_KEY(I8, F32):
|
||||
case _PACK_SELECT_KEY(I16, F16):
|
||||
case _PACK_SELECT_KEY(I16, F32):
|
||||
key = AVG_POOL3D_HASH_KEY( I32, F32);
|
||||
break;
|
||||
default:
|
||||
key = AVG_POOL3D_HASH_KEY( in_dtype, out_dtype);
|
||||
break;
|
||||
}
|
||||
#undef _PACK_SELECT_KEY
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _avg_pool3d_kernel_param_def );
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_AVG_POOL3D_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t width = (int32_t)inputs[0]->attr.size[0];
|
||||
int32_t height = (int32_t)inputs[0]->attr.size[1];
|
||||
int32_t ksize_x = vsi_nn_kernel_param_get_int32(params, "ksize_x");
|
||||
int32_t ksize_y = vsi_nn_kernel_param_get_int32(params, "ksize_y");
|
||||
int32_t ksize_z = vsi_nn_kernel_param_get_int32(params, "ksize_z");
|
||||
int32_t stride_x = vsi_nn_kernel_param_get_int32(params, "stride_x");
|
||||
int32_t stride_y = vsi_nn_kernel_param_get_int32(params, "stride_y");
|
||||
int32_t stride_z = vsi_nn_kernel_param_get_int32(params, "stride_z");
|
||||
int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
|
||||
int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
|
||||
int32_t pad_front = vsi_nn_kernel_param_get_int32(params, "pad_front");
|
||||
int32_t depth_in = vsi_nn_kernel_param_get_int32(params, "depth_in");
|
||||
int32_t depth_out = vsi_nn_kernel_param_get_int32(params, "depth_out");
|
||||
int32_t count_include_pad = vsi_nn_kernel_param_get_int32(params, "count_include_pad");
|
||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
uint32_t index = 2;
|
||||
vsi_nn_kernel_node_pack_io( node_params, _AVG_POOL3D_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_x );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_y );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_z );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_x );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_y );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_z );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_front );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &depth_in );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &depth_out );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &count_include_pad );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _AVG_POOL3D_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[2] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[4] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[5] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[7] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[8] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[9] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[10] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[11] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[12] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[13] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[14] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[15] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[16] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[17] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[18] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[19] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( avg_pool3d, _setup )
|
||||
|
||||
|
|
@ -0,0 +1,381 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
INTERNAL_KERNEL_BILINEAR_GRID_SAMPLE,
|
||||
} _internal_kernel_e;
|
||||
|
||||
#define _BILINEAR_GRID_SAMPLE_KERNEL_SOURCE() "bilinear_grid_sample"
|
||||
|
||||
#define STR(a) #a
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define BILINEAR_GRID_SAMPLE_HASH_KEY(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE) \
|
||||
((IN1_DTYPE << 20) | (IN0_DTYPE << 8) | (OUT_DTYPE))
|
||||
|
||||
#define PACK_KERNEL_MAP(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE) \
|
||||
{ \
|
||||
BILINEAR_GRID_SAMPLE_HASH_KEY(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE), \
|
||||
CVIVANTE_NAMESPACE("cl.bilinear_grid_sample_" STR(IN0_DTYPE) "_" STR(IN1_DTYPE) "to" STR(OUT_DTYPE)), \
|
||||
_BILINEAR_GRID_SAMPLE_KERNEL_SOURCE() \
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _bilinear_grid_sample_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
PACK_KERNEL_MAP(F32, F32, F32 ),
|
||||
PACK_KERNEL_MAP(U8, U8, U8),
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _bilinear_grid_sample_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
#define _BILINEAR_GRID_SAMPLE_PARAM_NUM 8
|
||||
#define _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM \
|
||||
_cnt_of_array(_bilinear_grid_sample_kernel_param_def)
|
||||
|
||||
#define SCALAR_HALF_INPUT0_W (3)
|
||||
#define SCALAR_HALF_INPUT0_H (4)
|
||||
#define SCALAR_ADD_VALUE_W (5)
|
||||
#define SCALAR_ADD_VALUE_H (6)
|
||||
#define SCALAR_DEPTH (7)
|
||||
#define SCALAR_INPUT0_SCALE (8)
|
||||
#define SCALAR_INPUT0_TAIL (9)
|
||||
#define SCALAR_INPUT1_SCALE (10)
|
||||
#define SCALAR_INPUT1_TAIL (11)
|
||||
#define SCALAR_OUTPUT_SCALE (12)
|
||||
#define SCALAR_OUTPUT_TAIL (13)
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_bilinear_grid_sample_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
gpu_param_t gpu_param = {3, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
|
||||
vsi_nn_kernel_tensor_attr_t* output_attr = NULL;
|
||||
vsi_size_array_t* out_shape = NULL;
|
||||
|
||||
output_attr =
|
||||
vsi_nn_kernel_tensor_attr_create((vsi_nn_kernel_tensor_t)param[2]);
|
||||
CHECK_PTR_FAIL_GOTO(output_attr, "Create tensor attr buffer fail.", final);
|
||||
|
||||
out_shape = output_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
gpu_param.dim = 2;
|
||||
gpu_param.global_size[0] =
|
||||
gpu_align_p2((out_shape->data[0] + gpu_param.global_scale[0] - 1) /
|
||||
gpu_param.global_scale[0],
|
||||
4);
|
||||
gpu_param.global_size[1] =
|
||||
((out_shape->data[1] + gpu_param.global_scale[1] - 1) /
|
||||
gpu_param.global_scale[1]);
|
||||
gpu_param.global_size[2] = 1;
|
||||
status = vsi_nn_kernel_gpu_config(node, &gpu_param);
|
||||
|
||||
final:
|
||||
#define SAFE_FREE_TENSOR_ATTR(_PTR) \
|
||||
if (_PTR) { \
|
||||
vsi_nn_kernel_tensor_attr_release(&_PTR); \
|
||||
_PTR = NULL; \
|
||||
}
|
||||
SAFE_FREE_TENSOR_ATTR(output_attr);
|
||||
return status;
|
||||
} /* _bilinear_grid_sample_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
vsi_bool* is_use_u8_kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in0_dtype, in1_dtype;
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
const _kernel_map_type * kernel_map = _bilinear_grid_sample_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _bilinear_grid_sample_kernel_map );
|
||||
vx_param_description_t * param_def = _bilinear_grid_sample_kernel_param_def;
|
||||
size_t param_def_size = _cnt_of_array(_bilinear_grid_sample_kernel_param_def);
|
||||
vx_kernel_initialize_f initializer = _bilinear_grid_sample_initializer;
|
||||
|
||||
uint32_t key;
|
||||
uint32_t i;
|
||||
|
||||
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (F16 == in0_dtype) {
|
||||
in0_dtype = F32;
|
||||
}
|
||||
if (F16 == in1_dtype) {
|
||||
in1_dtype = F32;
|
||||
}
|
||||
if (F16 == out_dtype) {
|
||||
out_dtype = F32;
|
||||
}
|
||||
if ((U8 == in0_dtype) || (U8 == out_dtype)) {
|
||||
param_def_size = _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM;
|
||||
*is_use_u8_kernel = TRUE;
|
||||
} else {
|
||||
param_def_size = _BILINEAR_GRID_SAMPLE_PARAM_NUM;
|
||||
*is_use_u8_kernel = FALSE;
|
||||
}
|
||||
|
||||
key = BILINEAR_GRID_SAMPLE_HASH_KEY(in0_dtype, in1_dtype, out_dtype);
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = (uint32_t)param_def_size;
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM];
|
||||
vsi_size_t final_shape[VSI_NN_MAX_DIM_NUM] = {1, 1, 1, 1};
|
||||
uint32_t final_in1_rank = 0;
|
||||
vsi_nn_tensor_t* rs_tensors = NULL;
|
||||
vsi_nn_tensor_t* final_tensors[3] = {NULL};
|
||||
vsi_size_t in0_width = inputs[0]->attr.size[0];
|
||||
vsi_size_t in0_height = inputs[0]->attr.size[1];
|
||||
float input0_zp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
float input0_scale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float input0_tail = -(input0_zp * input0_scale);
|
||||
float input1_zp = (float)vsi_nn_get_tensor_zero_point(inputs[1]);
|
||||
float input1_scale = vsi_nn_get_tensor_scale(inputs[1]);
|
||||
float input1_tail = -(input1_zp * input1_scale);
|
||||
float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float output_scale = 1.0f / vsi_nn_get_tensor_scale(outputs[0]);
|
||||
vsi_bool is_use_u8_kernel = FALSE;
|
||||
int32_t align_corners =
|
||||
vsi_nn_kernel_param_get_int32(params, "align_corners");
|
||||
uint32_t pad_val = 0;
|
||||
int32_t depth = 0;
|
||||
vsi_nn_kernel_dtype_e in0_dtype;
|
||||
|
||||
float half_input0_w, half_input0_h, add_float_value_w, add_float_value_h;
|
||||
|
||||
// Check if gpu can support the size
|
||||
if (!vsi_nn_kernel_gpu_check_shape(inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!vsi_nn_kernel_gpu_check_shape(inputs[1]->attr.size,
|
||||
inputs[1]->attr.dim_num)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
final_tensors[0] = inputs[0];
|
||||
|
||||
if (inputs[1]->attr.dim_num >= 3) {
|
||||
|
||||
final_shape[0] = inputs[1]->attr.size[1] * inputs[1]->attr.size[0];
|
||||
final_shape[1] = inputs[1]->attr.size[2];
|
||||
final_shape[2] = 1;
|
||||
final_shape[3] = inputs[1]->attr.dim_num > 3 ? inputs[1]->attr.size[3] : 1;
|
||||
final_in1_rank =
|
||||
inputs[1]->attr.dim_num == 3 ? 2 : inputs[1]->attr.dim_num;
|
||||
if (!vsi_nn_kernel_gpu_check_shape(final_shape, final_in1_rank)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rs_tensors = vsi_nn_reshape_tensor(graph, inputs[1], final_shape, final_in1_rank);
|
||||
final_tensors[1] = rs_tensors;
|
||||
} else {
|
||||
final_tensors[1] = inputs[1];
|
||||
}
|
||||
final_tensors[2] = outputs[0];
|
||||
|
||||
if (align_corners) {
|
||||
half_input0_w = ((float)in0_width - 1.0f) * 0.5f;
|
||||
half_input0_h = ((float)in0_height - 1.0f) * 0.5f;
|
||||
add_float_value_w = half_input0_w;
|
||||
add_float_value_h = half_input0_h;
|
||||
} else {
|
||||
half_input0_w = (float)in0_width * 0.5f;
|
||||
half_input0_h = (float)in0_height * 0.5f;
|
||||
add_float_value_w = half_input0_w - 0.5f;
|
||||
add_float_value_h = half_input0_h - 0.5f;
|
||||
}
|
||||
|
||||
depth = (int32_t)inputs[0]->attr.size[2];
|
||||
in0_dtype = vsi_nn_kernel_map_dtype(inputs[0]->attr.dtype.vx_type);
|
||||
if (U8 == in0_dtype) {
|
||||
pad_val = inputs[0]->attr.dtype.zero_point;
|
||||
}
|
||||
status = _query_kernel(kernel, inputs, outputs, &is_use_u8_kernel);
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
size_t node_params_num = _BILINEAR_GRID_SAMPLE_PARAM_NUM;
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM,
|
||||
final_tensors, input_num, &final_tensors[2], output_num );
|
||||
node_params[SCALAR_HALF_INPUT0_W] = vsi_nn_kernel_scalar_create( graph, F32, &half_input0_w );
|
||||
node_params[SCALAR_HALF_INPUT0_H] = vsi_nn_kernel_scalar_create( graph, F32, &half_input0_h );
|
||||
node_params[SCALAR_ADD_VALUE_W] = vsi_nn_kernel_scalar_create( graph, F32, &add_float_value_w );
|
||||
node_params[SCALAR_ADD_VALUE_H] = vsi_nn_kernel_scalar_create( graph, F32, &add_float_value_h );
|
||||
node_params[SCALAR_DEPTH] = vsi_nn_kernel_scalar_create( graph, I32, &depth );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
node_params[SCALAR_INPUT0_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input0_scale );
|
||||
node_params[SCALAR_INPUT0_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &input0_tail );
|
||||
node_params[SCALAR_INPUT1_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input1_scale );
|
||||
node_params[SCALAR_INPUT1_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &input1_tail );
|
||||
node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
|
||||
node_params[SCALAR_OUTPUT_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &output_zp );
|
||||
node_params_num = _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM;
|
||||
}
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
|
||||
VSI_ASSERT(status == VSI_SUCCESS);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_HALF_INPUT0_W]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_HALF_INPUT0_H]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_ADD_VALUE_W]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_ADD_VALUE_H]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_DEPTH]);
|
||||
if (is_use_u8_kernel) {
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT0_SCALE]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT0_TAIL]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT1_SCALE]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT1_TAIL]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_OUTPUT_SCALE]);
|
||||
vsi_nn_kernel_scalar_release(&node_params[SCALAR_OUTPUT_TAIL]);
|
||||
}
|
||||
{
|
||||
// Set default border mode.
|
||||
vx_border_t border;
|
||||
border.mode = VX_BORDER_CONSTANT;
|
||||
border.constant_value.U32 = pad_val;
|
||||
status = vxSetNodeAttribute(
|
||||
(vx_node)node, VX_NODE_BORDER, &border, sizeof(border));
|
||||
CHECK_STATUS(status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vsi_safe_release_tensor(rs_tensors);
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( bilinear_grid_sample, _setup )
|
||||
|
||||
|
|
@ -35,6 +35,7 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -258,19 +259,36 @@ static vsi_nn_kernel_node_t _setup
|
|||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
float min_value = vsi_nn_kernel_param_get_float32( params, "min_value" );
|
||||
float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
|
||||
vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret = TRUE;
|
||||
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num, shape, &new_rank);
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num ) )
|
||||
if ( ret )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
image_2d = (inputs[0]->attr.dim_num == 2 || inputs[0]->attr.size[2] == 1);
|
||||
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], shape, new_rank );
|
||||
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], shape, new_rank );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, image_2d);
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[0]->attr.size,
|
||||
reshape_tensors[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
image_2d = (reshape_tensors[0]->attr.dim_num == 2 || reshape_tensors[0]->attr.size[2] == 1);
|
||||
|
||||
status = _query_kernel( kernel, reshape_tensors, &reshape_tensors[1], image_2d);
|
||||
|
||||
if ( VSI_SUCCESS == status )
|
||||
{
|
||||
|
|
@ -279,7 +297,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _CLIP_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
reshape_tensors, input_num, &reshape_tensors[1], output_num );
|
||||
node_params[SCALAR_MIN_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &min_value );
|
||||
node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
|
||||
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
|
|
@ -297,6 +315,10 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
|
||||
}
|
||||
}
|
||||
|
||||
vsi_safe_release_tensor( reshape_tensors[0] );
|
||||
vsi_safe_release_tensor( reshape_tensors[1] );
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -287,7 +288,7 @@ static vsi_status _query_kernel
|
|||
int i;
|
||||
|
||||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
input1_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (outputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_NONE && output_dtype == I8)
|
||||
|
|
@ -335,31 +336,85 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_CL_PARAM_NUM] = {NULL};
|
||||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t operation = 0;
|
||||
int32_t operation = vsi_nn_kernel_param_get_int32( params, "operation" );
|
||||
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = { { 0 } };
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret = FALSE;
|
||||
|
||||
float input0Scale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float input0Tail = (float)vsi_nn_get_tensor_zero_point(inputs[0]) * input0Scale;
|
||||
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
|
||||
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]) * input1Scale;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
ret = vsi_nn_kernel_optimize_eltwise_shape(
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
shapes[0], shapes[1], shapes[2], &new_rank );
|
||||
|
||||
if ( ret )
|
||||
{
|
||||
return NULL;
|
||||
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], shapes[0], new_rank );
|
||||
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[1], shapes[1], new_rank );
|
||||
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], shapes[2], new_rank );
|
||||
|
||||
#define _swap_tensor(a, b, tmp) \
|
||||
do { \
|
||||
tmp = a; \
|
||||
a = b; \
|
||||
b = tmp; \
|
||||
} while(0)
|
||||
|
||||
if (shapes[1][3] > shapes[0][3] && new_rank == 4)
|
||||
{
|
||||
vsi_nn_tensor_t* reshape_tmp;
|
||||
_swap_tensor(reshape_tensors[0], reshape_tensors[1], reshape_tmp);
|
||||
|
||||
if (VSI_NN_RELATIONAL_OPS_GREAT == operation)
|
||||
{
|
||||
operation = VSI_NN_RELATIONAL_OPS_LESS;
|
||||
}
|
||||
else if (VSI_NN_RELATIONAL_OPS_LESS == operation)
|
||||
{
|
||||
operation = VSI_NN_RELATIONAL_OPS_GREAT;
|
||||
}
|
||||
else if (VSI_NN_RELATIONAL_OPS_GREAT_EQUAL == operation)
|
||||
{
|
||||
operation = VSI_NN_RELATIONAL_OPS_LESS_EQUAL;
|
||||
}
|
||||
else if (VSI_NN_RELATIONAL_OPS_LESS_EQUAL == operation)
|
||||
{
|
||||
operation = VSI_NN_RELATIONAL_OPS_GREAT_EQUAL;
|
||||
}
|
||||
}
|
||||
|
||||
#undef _swap_tensor
|
||||
}
|
||||
else
|
||||
{
|
||||
goto final;
|
||||
}
|
||||
|
||||
operation = vsi_nn_kernel_param_get_int32( params, "operation" );
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[2]->attr.size,
|
||||
reshape_tensors[2]->attr.dim_num ) )
|
||||
{
|
||||
goto final;
|
||||
}
|
||||
|
||||
image_2d = (outputs[0]->attr.dim_num == 2);
|
||||
status = _query_kernel( inputs, outputs, operation, image_2d, kernel );
|
||||
if( VSI_SUCCESS == status)
|
||||
image_2d = (reshape_tensors[2]->attr.dim_num == 2 || reshape_tensors[2]->attr.size[2] == 1);
|
||||
status = _query_kernel( reshape_tensors, &reshape_tensors[2], operation, image_2d, kernel );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
|
||||
if( node )
|
||||
if ( node )
|
||||
{
|
||||
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
|
||||
inputs, 2, outputs, 1 );
|
||||
reshape_tensors, 2, &reshape_tensors[2], 1 );
|
||||
node_params[SCALAR_INPUT0_SCALE] = vsi_nn_kernel_scalar_create(
|
||||
graph, F32, &input0Scale );
|
||||
node_params[SCALAR_INPUT0_TAIL] = vsi_nn_kernel_scalar_create(
|
||||
|
|
@ -379,6 +434,12 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT1_TAIL] );
|
||||
}
|
||||
}
|
||||
|
||||
final:
|
||||
vsi_safe_release_tensor( reshape_tensors[0] );
|
||||
vsi_safe_release_tensor( reshape_tensors[1] );
|
||||
vsi_safe_release_tensor( reshape_tensors[2] );
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -69,14 +69,19 @@ static const struct {
|
|||
{
|
||||
HASH_CUMSUM_KERNELS(0, U8, U8)
|
||||
HASH_CUMSUM_KERNELS(0, F32, F32)
|
||||
HASH_CUMSUM_KERNELS(0, F32, U8)
|
||||
HASH_CUMSUM_KERNELS(1, U8, U8)
|
||||
HASH_CUMSUM_KERNELS(1, F32, F32)
|
||||
HASH_CUMSUM_KERNELS(1, F32, U8)
|
||||
HASH_CUMSUM_KERNELS(2, U8, U8)
|
||||
HASH_CUMSUM_KERNELS(2, F32, F32)
|
||||
HASH_CUMSUM_KERNELS(2, F32, U8)
|
||||
HASH_CUMSUM_KERNELS_2D(0, U8, U8)
|
||||
HASH_CUMSUM_KERNELS_2D(0, F32, F32)
|
||||
HASH_CUMSUM_KERNELS_2D(0, F32, U8)
|
||||
HASH_CUMSUM_KERNELS_2D(1, U8, U8)
|
||||
HASH_CUMSUM_KERNELS_2D(1, F32, F32)
|
||||
HASH_CUMSUM_KERNELS_2D(1, F32, U8)
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -56,6 +56,10 @@ typedef enum
|
|||
UNARY_RCP,
|
||||
UNARY_SIGN,
|
||||
UNARY_SOFTSIGN,
|
||||
UNARY_ATAN,
|
||||
UNARY_ATANH,
|
||||
UNARY_ACOSH,
|
||||
UNARY_INVERSE_SIGMOID,
|
||||
} unary_type_e;
|
||||
|
||||
/*
|
||||
|
|
@ -100,10 +104,18 @@ typedef enum
|
|||
#define RCP_OPERATION rcp
|
||||
#define SIGN_OPERATION sign
|
||||
#define SOFTSIGN_OPERATION softsign
|
||||
#define ATAN_OPERATION atan
|
||||
#define ATANH_OPERATION atanh
|
||||
#define ACOSH_OPERATION acosh
|
||||
#define INVERSE_SIGMOID_OPERATION inverse_sigmoid
|
||||
|
||||
#define ADD_UNARY_SH_KERNELS(name, src_type, dst_type) \
|
||||
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, src_type, dst_type) \
|
||||
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, src_type, dst_type)
|
||||
#define ADD_UNARY_SH_KERNELS(name) \
|
||||
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, F32, F32) \
|
||||
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, F32, F32) \
|
||||
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, U8, U8) \
|
||||
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, U8, U8) \
|
||||
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, U8, F32) \
|
||||
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, U8, F32)
|
||||
|
||||
static const struct {
|
||||
uint32_t key;
|
||||
|
|
@ -111,39 +123,28 @@ static const struct {
|
|||
const char* source_name;
|
||||
} kernel_map[] =
|
||||
{
|
||||
ADD_UNARY_SH_KERNELS(SIN, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(COS, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(EXP, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(LOG, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(NEG, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(HSIGMOID, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(MISH, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(ROUND, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(GELU, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(HGELU, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(SELU, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(CELU, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(RCP, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(SIGN, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(SOFTSIGN, F32, F32)
|
||||
ADD_UNARY_SH_KERNELS(SIN)
|
||||
ADD_UNARY_SH_KERNELS(COS)
|
||||
ADD_UNARY_SH_KERNELS(EXP)
|
||||
ADD_UNARY_SH_KERNELS(LOG)
|
||||
ADD_UNARY_SH_KERNELS(NEG)
|
||||
ADD_UNARY_SH_KERNELS(HSIGMOID)
|
||||
ADD_UNARY_SH_KERNELS(MISH)
|
||||
ADD_UNARY_SH_KERNELS(ROUND)
|
||||
ADD_UNARY_SH_KERNELS(GELU)
|
||||
ADD_UNARY_SH_KERNELS(HGELU)
|
||||
ADD_UNARY_SH_KERNELS(SELU)
|
||||
ADD_UNARY_SH_KERNELS(CELU)
|
||||
ADD_UNARY_SH_KERNELS(RCP)
|
||||
ADD_UNARY_SH_KERNELS(SIGN)
|
||||
ADD_UNARY_SH_KERNELS(SOFTSIGN)
|
||||
ADD_UNARY_SH_KERNELS(ATAN)
|
||||
ADD_UNARY_SH_KERNELS(ATANH)
|
||||
ADD_UNARY_SH_KERNELS(ACOSH)
|
||||
ADD_UNARY_SH_KERNELS(INVERSE_SIGMOID)
|
||||
|
||||
ADD_UNARY_SH_KERNELS(SIN, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(COS, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(EXP, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(LOG, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(NEG, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(HSIGMOID, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(MISH, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(ROUND, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(GELU, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(HGELU, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(SELU, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(CELU, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(RCP, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(SIGN, U8, U8)
|
||||
ADD_UNARY_SH_KERNELS(SOFTSIGN, U8, U8)
|
||||
|
||||
ADD_UNARY_SH_KERNELS(NEG, I32, I32)
|
||||
TENSOR_UNARY_KERNELS_3D(NEG_OPERATION, UNARY_NEG, I32, I32)
|
||||
TENSOR_UNARY_KERNELS_2D(NEG_OPERATION, UNARY_NEG, I32, I32)
|
||||
};
|
||||
|
||||
#undef SIN_OPERATION
|
||||
|
|
@ -161,6 +162,10 @@ static const struct {
|
|||
#undef RCP_OPERATION
|
||||
#undef SIGN_OPERATION
|
||||
#undef SOFTSIGN_OPERATION
|
||||
#undef ATAN_OPERATION
|
||||
#undef ATANH_OPERATION
|
||||
#undef ACOSH_OPERATION
|
||||
#undef INVERSE_SIGMOID_OPERATION
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
|
|
@ -262,6 +267,10 @@ static vsi_status _query_kernel
|
|||
case _PACK_SELECT_KEY(F16, F16):
|
||||
key = HASH_UNARY_KEY( type, F32, F32, image_2d );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, F32):
|
||||
case _PACK_SELECT_KEY(U8, F16):
|
||||
key = HASH_UNARY_KEY( type, U8, F32, image_2d );
|
||||
break;
|
||||
default:
|
||||
key = HASH_UNARY_KEY( type, input_dtype, output_dtype, image_2d );
|
||||
break;
|
||||
|
|
@ -330,7 +339,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shape, &new_rank );
|
||||
if( ret )
|
||||
if ( ret )
|
||||
{
|
||||
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], shape, new_rank );
|
||||
|
|
@ -338,7 +347,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
outputs[0], shape, new_rank );
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
|
||||
rs_tensors[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
@ -348,11 +357,11 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
image_2d = (rs_tensors[0]->attr.dim_num == 2 || rs_tensors[0]->attr.size[2] == 1);
|
||||
status = _query_kernel( rs_tensors, &rs_tensors[1], unary_type, image_2d, kernel );
|
||||
if( VSI_SUCCESS == status)
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
|
||||
if( node )
|
||||
if ( node )
|
||||
{
|
||||
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
|
||||
rs_tensors, 1, &rs_tensors[1], 1 );
|
||||
|
|
@ -452,5 +461,9 @@ REGISTER_ELTWISE_UNARY_BACKEND_CL( celu, UNARY_CELU )
|
|||
REGISTER_ELTWISE_UNARY_BACKEND_CL( rcp, UNARY_RCP )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( sign, UNARY_SIGN )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( softsign, UNARY_SOFTSIGN )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( atan, UNARY_ATAN )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( atanh, UNARY_ATANH )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( acosh, UNARY_ACOSH )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( inverse_sigmoid, UNARY_INVERSE_SIGMOID )
|
||||
|
||||
__END_DECLS
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ typedef enum
|
|||
|
||||
#define _GATHER_KERNEL_SOURCE "gather"
|
||||
#define _GATHER_BATCH_KERNEL_SOURCE "gather_batch"
|
||||
#define _GATHER_ARRAY_KERNEL_SOURCE "gather_array"
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define VX_KERNEL_NAME_GATHER_U8TOU8 CVIVANTE_NAMESPACE("cl.gather_U8toU8")
|
||||
|
|
@ -61,9 +62,14 @@ typedef enum
|
|||
#define VX_KERNEL_NAME_GATHER_BATCH_I32TOI32 CVIVANTE_NAMESPACE("cl.gather_batch_I32toI32")
|
||||
#define VX_KERNEL_NAME_GATHER_BATCH_F32TOF32 CVIVANTE_NAMESPACE("cl.gather_batch_F32toF32")
|
||||
|
||||
#define VX_KERNEL_NAME_GATHER_ARRAY_U8TOU8 CVIVANTE_NAMESPACE("cl.gather_array_U8toU8")
|
||||
#define VX_KERNEL_NAME_GATHER_ARRAY_F16TOF16 CVIVANTE_NAMESPACE("cl.gather_array_F16toF16")
|
||||
#define VX_KERNEL_NAME_GATHER_ARRAY_I32TOI32 CVIVANTE_NAMESPACE("cl.gather_array_I32toI32")
|
||||
#define VX_KERNEL_NAME_GATHER_ARRAY_F32TOF32 CVIVANTE_NAMESPACE("cl.gather_array_F32toF32")
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define HASH_GATHER_KEY(_input0_type, _input1_type, _output_type, _image_2d, _batch) \
|
||||
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_image_2d << 4) | (_batch))
|
||||
#define HASH_GATHER_KEY(_input0_type, _input1_type, _output_type, _is_array, _batch) \
|
||||
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_is_array << 4) | (_batch))
|
||||
|
||||
#define TENSOR_GATHER_KERNELS(IN0_TYPE, IN1TYPE, OUT_TYPE, SOURCE) \
|
||||
{ HASH_GATHER_KEY(IN0_TYPE, IN1TYPE, OUT_TYPE, 0, 0), \
|
||||
|
|
@ -75,6 +81,11 @@ typedef enum
|
|||
VX_KERNEL_NAME_GATHER_BATCH_##IN0_TYPE##TO##OUT_TYPE, \
|
||||
SOURCE },
|
||||
|
||||
#define TENSOR_GATHER_ARRAY_KERNELS(IN0_TYPE, IN1TYPE, OUT_TYPE, SOURCE) \
|
||||
{ HASH_GATHER_KEY(IN0_TYPE, IN1TYPE, OUT_TYPE, 1, 0), \
|
||||
VX_KERNEL_NAME_GATHER_ARRAY_##IN0_TYPE##TO##OUT_TYPE, \
|
||||
SOURCE },
|
||||
|
||||
static const struct {
|
||||
uint32_t key;
|
||||
char* function_name;
|
||||
|
|
@ -89,6 +100,10 @@ static const struct {
|
|||
TENSOR_GATHER_BATCH_KERNELS(F16, I32, F16, _GATHER_BATCH_KERNEL_SOURCE)
|
||||
TENSOR_GATHER_BATCH_KERNELS(I32, I32, I32, _GATHER_BATCH_KERNEL_SOURCE)
|
||||
TENSOR_GATHER_BATCH_KERNELS(F32, I32, F32, _GATHER_BATCH_KERNEL_SOURCE)
|
||||
TENSOR_GATHER_ARRAY_KERNELS(U8, I32, U8, _GATHER_ARRAY_KERNEL_SOURCE)
|
||||
TENSOR_GATHER_ARRAY_KERNELS(F16, I32, F16, _GATHER_ARRAY_KERNEL_SOURCE)
|
||||
TENSOR_GATHER_ARRAY_KERNELS(I32, I32, I32, _GATHER_ARRAY_KERNEL_SOURCE)
|
||||
TENSOR_GATHER_ARRAY_KERNELS(F32, I32, F32, _GATHER_ARRAY_KERNEL_SOURCE)
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -114,7 +129,8 @@ static vsi_status cal_gather_tensor_reshape_size
|
|||
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t block_size,
|
||||
vsi_size_t batch_dims,
|
||||
uint32_t idxFlg
|
||||
uint32_t idxFlg,
|
||||
int32_t* arrayFlg
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
|
|
@ -148,18 +164,19 @@ static vsi_status cal_gather_tensor_reshape_size
|
|||
}
|
||||
else
|
||||
{
|
||||
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
|
||||
sizes[0] = block_size;
|
||||
sizes[1] = elementCnt / block_size;
|
||||
sizes[2] = outerCnt;
|
||||
if ((elementCnt / block_size) >= VSI_NN_MAX_IMAGE_WIDTH)
|
||||
{
|
||||
sizes[0] = block_size;
|
||||
sizes[1] = elementCnt / block_size;
|
||||
sizes[2] = outerCnt;
|
||||
status = VSI_SUCCESS;
|
||||
arrayFlg[0] |= 1;
|
||||
}
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
#undef VSI_NN_MAX_IMAGE_WIDTH
|
||||
|
||||
return status;
|
||||
} /* _get_EltOP_tensor_reshape_size */
|
||||
} /* cal_gather_tensor_reshape_size */
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
|
|
@ -209,8 +226,7 @@ DEF_KERNEL_INITIALIZER(_gather_initializer)
|
|||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
gpu_param.global_size[0] = gpu_align_p2((block_size + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0], 4);
|
||||
gpu_param.global_size[0] = block_size;
|
||||
gpu_param.global_size[1] = indices_num;
|
||||
gpu_param.global_size[2] = block_num;
|
||||
|
||||
|
|
@ -239,7 +255,8 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
int32_t is_batch
|
||||
int32_t is_batch,
|
||||
int32_t is_array
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
|
|
@ -262,7 +279,7 @@ static vsi_status _query_kernel
|
|||
output_dtype = I32;
|
||||
}
|
||||
|
||||
key = HASH_GATHER_KEY( input0_dtype, I32, output_dtype, 0, is_batch );
|
||||
key = HASH_GATHER_KEY( input0_dtype, I32, output_dtype, is_array, is_batch );
|
||||
|
||||
for ( i = 0; i < _cnt_of_array(gather_map); i ++ )
|
||||
{
|
||||
|
|
@ -314,11 +331,12 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t indices_num = vsi_nn_kernel_param_get_int32( params, "indices_num" );
|
||||
int32_t is_batch = batch_dims > 0 ? 1 : 0;
|
||||
vsi_size_t rs_dim = batch_dims == 0 ? 2 : 3;
|
||||
int32_t is_array = block_size >= GPU_TENSOR_MAX_WIDTH ? 1 : 0;
|
||||
int32_t i = 0;
|
||||
|
||||
status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0);
|
||||
status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1);
|
||||
status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0);
|
||||
status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0, &is_array);
|
||||
status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1, &is_array);
|
||||
status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0, &is_array);
|
||||
if (status != VSI_SUCCESS)
|
||||
{
|
||||
return NULL;
|
||||
|
|
@ -337,7 +355,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, is_batch );
|
||||
status = _query_kernel( kernel, inputs, outputs, is_batch, is_array );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ __BEGIN_DECLS
|
|||
*/
|
||||
#define KERNEL_SOURCE_1 "gather_nd"
|
||||
#define KERNEL_SOURCE_2 "gather_nd_3d"
|
||||
#define KERNEL_SOURCE_3 "gather_nd_batch"
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
|
@ -52,17 +53,25 @@ __BEGIN_DECLS
|
|||
_3D
|
||||
} vsi_nn_kernel_coord_type_e;
|
||||
|
||||
#define HASH_GATHER_ND_KEY(_input0_type, _input1_type, _output_type, _coord_dim) \
|
||||
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_coord_dim))
|
||||
#define HASH_GATHER_ND_KEY(_input0_type, _input1_type, _output_type, _coord_dim, _batch_dims) \
|
||||
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_coord_dim << 4) | (_batch_dims))
|
||||
|
||||
#define HASH_GATHER_ND_SH_KERNEL_NAME(SRC0_TYPE, DST_TYPE, COORD_TYPE) \
|
||||
CVIVANTE_NAMESPACE("cl.gather_nd_"#SRC0_TYPE"to"#DST_TYPE#COORD_TYPE)
|
||||
|
||||
#define TENSOR_GATHER_ND_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, SOURCE) \
|
||||
{ HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE), \
|
||||
{ HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, 0), \
|
||||
HASH_GATHER_ND_SH_KERNEL_NAME(IN0_TYPE, OUT_TYPE, COORD_TYPE), \
|
||||
SOURCE },
|
||||
|
||||
#define HASH_GATHER_ND_BATCH_SH_KERNEL_NAME(SRC0_TYPE, DST_TYPE, COORD_TYPE) \
|
||||
CVIVANTE_NAMESPACE("cl.gather_nd_batch_"#SRC0_TYPE"to"#DST_TYPE#COORD_TYPE)
|
||||
|
||||
#define TENSOR_GATHER_ND_BATCH_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, SOURCE) \
|
||||
{ HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, 1), \
|
||||
HASH_GATHER_ND_BATCH_SH_KERNEL_NAME(IN0_TYPE, OUT_TYPE, COORD_TYPE), \
|
||||
SOURCE },
|
||||
|
||||
static const struct {
|
||||
uint32_t key;
|
||||
char* function_name;
|
||||
|
|
@ -81,6 +90,12 @@ static const struct {
|
|||
TENSOR_GATHER_ND_KERNELS(F16, I32, F16, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_GATHER_ND_KERNELS(I32, I32, I32, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_GATHER_ND_KERNELS(F32, I32, F32, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_GATHER_ND_BATCH_KERNELS(U8, I32, U8, _1D, KERNEL_SOURCE_3)
|
||||
TENSOR_GATHER_ND_BATCH_KERNELS(I8, I32, I8, _1D, KERNEL_SOURCE_3)
|
||||
TENSOR_GATHER_ND_BATCH_KERNELS(F16, I32, F16, _1D, KERNEL_SOURCE_3)
|
||||
TENSOR_GATHER_ND_BATCH_KERNELS(U8, I32, U8, _2D, KERNEL_SOURCE_3)
|
||||
TENSOR_GATHER_ND_BATCH_KERNELS(I8, I32, I8, _2D, KERNEL_SOURCE_3)
|
||||
TENSOR_GATHER_ND_BATCH_KERNELS(F16, I32, F16, _2D, KERNEL_SOURCE_3)
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -103,7 +118,8 @@ static vsi_status cal_gather_nd_tensor_reshape_size
|
|||
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t block_size,
|
||||
uint32_t coordDim,
|
||||
int32_t* newDim
|
||||
int32_t* newDim,
|
||||
int32_t batch_dims
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
|
|
@ -114,45 +130,63 @@ static vsi_status cal_gather_nd_tensor_reshape_size
|
|||
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
|
||||
|
||||
newDim[0] = 0;
|
||||
for(i = 0; i < dims_num; ++i)
|
||||
for (i = 0; i < dims_num; ++i)
|
||||
{
|
||||
elementCnt *= input_size[i];
|
||||
}
|
||||
|
||||
for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
|
||||
for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
|
||||
{
|
||||
sizes[i] = 1;
|
||||
}
|
||||
|
||||
if(coordDim) // input reshape
|
||||
if (coordDim) // input reshape
|
||||
{
|
||||
uint32_t offset = dims_num - coordDim + 1;
|
||||
for(i = coordDim-1; i > 0; i--)
|
||||
{
|
||||
sizes[i] = input_size[i + offset - 1];
|
||||
}
|
||||
for(i = 0; i < offset; i++)
|
||||
{
|
||||
sizes[0] *= input_size[i];
|
||||
}
|
||||
uint32_t offset = dims_num - coordDim + 1 - batch_dims;
|
||||
|
||||
newDim[0] = coordDim;
|
||||
if(coordDim == 1)
|
||||
if (batch_dims)
|
||||
{
|
||||
newDim[0] = 2;
|
||||
sizes[0] = block_size;
|
||||
sizes[1] = elementCnt / block_size;
|
||||
for (i = 0; i < offset; i++)
|
||||
{
|
||||
sizes[0] *= input_size[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < coordDim; i++)
|
||||
{
|
||||
sizes[i + 1] = input_size[i + offset];
|
||||
}
|
||||
|
||||
newDim[0] = coordDim == 1 ? 2 : 3;
|
||||
}
|
||||
else if(coordDim == 4)
|
||||
else
|
||||
{
|
||||
newDim[0] = 3;
|
||||
for (i = coordDim-1; i > 0; i--)
|
||||
{
|
||||
sizes[i] = input_size[i + offset - 1];
|
||||
}
|
||||
for (i = 0; i < offset; i++)
|
||||
{
|
||||
sizes[0] *= input_size[i];
|
||||
}
|
||||
|
||||
newDim[0] = coordDim;
|
||||
if (coordDim == 1)
|
||||
{
|
||||
newDim[0] = 2;
|
||||
sizes[0] = block_size;
|
||||
sizes[1] = elementCnt / block_size;
|
||||
}
|
||||
else if (coordDim == 4)
|
||||
{
|
||||
newDim[0] = 3;
|
||||
}
|
||||
}
|
||||
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
else // indices&output reshape
|
||||
{
|
||||
if((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
|
||||
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
|
||||
{
|
||||
sizes[0] = block_size;
|
||||
sizes[1] = elementCnt / block_size;
|
||||
|
|
@ -222,7 +256,8 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
int32_t coord_dim
|
||||
int32_t coord_dim,
|
||||
int32_t batch_dims
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
|
|
@ -234,30 +269,49 @@ static vsi_status _query_kernel
|
|||
|
||||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
if(coord_dim == 1)
|
||||
|
||||
if (input0_dtype == F32)
|
||||
{
|
||||
input0_dtype = F16;
|
||||
}
|
||||
else if (input0_dtype == I32 || input0_dtype == I16)
|
||||
{
|
||||
input0_dtype = I8;
|
||||
}
|
||||
|
||||
if (output_dtype == F32)
|
||||
{
|
||||
output_dtype = F16;
|
||||
}
|
||||
else if (output_dtype == I32 || output_dtype == I16)
|
||||
{
|
||||
output_dtype = I8;
|
||||
}
|
||||
|
||||
if (coord_dim == 1)
|
||||
{
|
||||
coord_type = _1D;
|
||||
}
|
||||
else if(coord_dim == 2)
|
||||
else if (coord_dim == 2)
|
||||
{
|
||||
coord_type = _2D;
|
||||
}
|
||||
else if(coord_dim == 3 || coord_dim == 4)
|
||||
else if (coord_dim == 3 || coord_dim == 4)
|
||||
{
|
||||
coord_type = _3D;
|
||||
}
|
||||
|
||||
key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type );
|
||||
key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type, batch_dims );
|
||||
|
||||
for( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
|
||||
for ( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
|
||||
{
|
||||
if( gather_nd_map[i].key == key )
|
||||
if ( gather_nd_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( i < _cnt_of_array(gather_nd_map) )
|
||||
if ( i < _cnt_of_array(gather_nd_map) )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", gather_nd_map[i].function_name );
|
||||
kernel->info.parameters = _gather_nd_kernel_param_def;
|
||||
|
|
@ -289,29 +343,30 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_GATHER_ND_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
int32_t batch_dims = vsi_nn_kernel_param_get_int32( params, "batch_dims" );
|
||||
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
|
||||
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
|
||||
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
||||
|
||||
status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim);
|
||||
status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim);
|
||||
status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim);
|
||||
if(status != VSI_SUCCESS)
|
||||
status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim, batch_dims);
|
||||
status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim, batch_dims);
|
||||
status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim, batch_dims);
|
||||
if (status != VSI_SUCCESS)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, coord_dim );
|
||||
if( VSI_SUCCESS == status)
|
||||
status = _query_kernel( kernel, inputs, outputs, coord_dim, batch_dims );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
if ( node )
|
||||
{
|
||||
uint32_t index = 0;
|
||||
/* Pass parameters to node. */
|
||||
|
|
|
|||
|
|
@ -0,0 +1,292 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define _GLOBALLPPOOL_KERNEL_SOURCE_NAME "globallppool"
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define GLOBALLPPOOL_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
|
||||
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
|
||||
#define GLOBALLPPOOL_KERNELS( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ GLOBALLPPOOL_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
|
||||
CVIVANTE_NAMESPACE("cl.globallppool_"#IN_DTYPE"to"#OUT_DTYPE), \
|
||||
_GLOBALLPPOOL_KERNEL_SOURCE_NAME }, \
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _globallppool_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
GLOBALLPPOOL_KERNELS( F32, F32 )
|
||||
GLOBALLPPOOL_KERNELS( F32, U32 )
|
||||
GLOBALLPPOOL_KERNELS( F32, I32 )
|
||||
GLOBALLPPOOL_KERNELS( U32, U32 )
|
||||
GLOBALLPPOOL_KERNELS( U32, F32 )
|
||||
GLOBALLPPOOL_KERNELS( I32, I32 )
|
||||
GLOBALLPPOOL_KERNELS( I32, F32 )
|
||||
GLOBALLPPOOL_KERNELS( BF16, BF16 )
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _globallppool_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _GLOBALLPPOOL_PARAM_NUM _cnt_of_array( _globallppool_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_globallppool_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
gpu_param_t gpu_param = {
|
||||
1,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[1];
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
output_shape = output_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_size[0] = (output_shape->data[2] + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0];
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
final:
|
||||
if (output_attr)
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release(&output_attr);
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _globallppool_initializer() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype;
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
const _kernel_map_type * kernel_map = _globallppool_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _globallppool_kernel_map );
|
||||
vx_param_description_t * param_def = _globallppool_kernel_param_def;
|
||||
vx_kernel_initialize_f initializer = _globallppool_initializer;
|
||||
|
||||
uint32_t key;
|
||||
uint32_t i;
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
|
||||
(( in_dtype ) | (out_dtype << 8 ))
|
||||
switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
|
||||
{
|
||||
case _PACK_SELECT_KEY(F32, F32):
|
||||
case _PACK_SELECT_KEY(F16, F16):
|
||||
case _PACK_SELECT_KEY(F32, F16):
|
||||
case _PACK_SELECT_KEY(F16, F32):
|
||||
key = GLOBALLPPOOL_HASH_KEY( F32, F32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, U8):
|
||||
case _PACK_SELECT_KEY(F16, U8):
|
||||
key = GLOBALLPPOOL_HASH_KEY( F32, U32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, I8):
|
||||
case _PACK_SELECT_KEY(F32, I16):
|
||||
case _PACK_SELECT_KEY(F16, I8):
|
||||
case _PACK_SELECT_KEY(F16, I16):
|
||||
key = GLOBALLPPOOL_HASH_KEY( F32, I32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, U8):
|
||||
key = GLOBALLPPOOL_HASH_KEY( U32, U32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, F16):
|
||||
case _PACK_SELECT_KEY(U8, F32):
|
||||
key = GLOBALLPPOOL_HASH_KEY( U32, F32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, I8):
|
||||
case _PACK_SELECT_KEY(I8, I16):
|
||||
case _PACK_SELECT_KEY(I16, I8):
|
||||
case _PACK_SELECT_KEY(I16, I16):
|
||||
key = GLOBALLPPOOL_HASH_KEY( I32, I32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, F16):
|
||||
case _PACK_SELECT_KEY(I8, F32):
|
||||
case _PACK_SELECT_KEY(I16, F16):
|
||||
case _PACK_SELECT_KEY(I16, F32):
|
||||
key = GLOBALLPPOOL_HASH_KEY( I32, F32);
|
||||
break;
|
||||
default:
|
||||
key = GLOBALLPPOOL_HASH_KEY( in_dtype, out_dtype);
|
||||
break;
|
||||
}
|
||||
#undef _PACK_SELECT_KEY
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _globallppool_kernel_param_def );
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
||||
"eltwise_ops_helper",
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_GLOBALLPPOOL_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t p = vsi_nn_kernel_param_get_int32(params, "p");
|
||||
int32_t width = (int32_t)inputs[0]->attr.size[0];
|
||||
int32_t height = (int32_t)inputs[0]->attr.size[1];
|
||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
uint32_t index = 2;
|
||||
vsi_nn_kernel_node_pack_io( node_params, _GLOBALLPPOOL_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &p );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _GLOBALLPPOOL_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[2] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[4] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[5] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[7] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[8] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( globallppool, _setup )
|
||||
|
||||
|
|
@ -0,0 +1,365 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
|
||||
#define _L1NORM_KERNEL_SOURCE_NAME "l1norm"
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, _image_2d, AXIS) \
|
||||
(( IN_DTYPE << 24 ) | ( OUT_DTYPE << 16) | (_image_2d << 8) | (AXIS))
|
||||
#define L1NORM_KERNELS( IN_DTYPE, OUT_DTYPE, AXIS ) \
|
||||
{ L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, 0 , AXIS), \
|
||||
CVIVANTE_NAMESPACE("cl.l1norm_"#IN_DTYPE"to"#OUT_DTYPE"_axis"#AXIS), \
|
||||
_L1NORM_KERNEL_SOURCE_NAME }
|
||||
|
||||
#define L1NORM_KERNELS_2D( IN_DTYPE, OUT_DTYPE, AXIS ) \
|
||||
{ L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, 1, AXIS), \
|
||||
CVIVANTE_NAMESPACE("cl.l1norm_"#IN_DTYPE"to"#OUT_DTYPE"_2D_axis"#AXIS), \
|
||||
_L1NORM_KERNEL_SOURCE_NAME }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _l1norm_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
L1NORM_KERNELS( U32, U32, 0 ),
|
||||
L1NORM_KERNELS( U32, I32, 0 ),
|
||||
L1NORM_KERNELS( U32, F32, 0 ),
|
||||
L1NORM_KERNELS( I32, I32, 0 ),
|
||||
L1NORM_KERNELS( I32, U32, 0 ),
|
||||
L1NORM_KERNELS( I32, F32, 0 ),
|
||||
L1NORM_KERNELS( F32, F32, 0 ),
|
||||
L1NORM_KERNELS( F32, U32, 0 ),
|
||||
L1NORM_KERNELS( F32, I32, 0 ),
|
||||
|
||||
L1NORM_KERNELS( U32, U32, 1 ),
|
||||
L1NORM_KERNELS( U32, I32, 1 ),
|
||||
L1NORM_KERNELS( U32, F32, 1 ),
|
||||
L1NORM_KERNELS( I32, I32, 1 ),
|
||||
L1NORM_KERNELS( I32, U32, 1 ),
|
||||
L1NORM_KERNELS( I32, F32, 1 ),
|
||||
L1NORM_KERNELS( F32, F32, 1 ),
|
||||
L1NORM_KERNELS( F32, U32, 1 ),
|
||||
L1NORM_KERNELS( F32, I32, 1 ),
|
||||
|
||||
L1NORM_KERNELS( U32, U32, 2 ),
|
||||
L1NORM_KERNELS( U32, I32, 2 ),
|
||||
L1NORM_KERNELS( U32, F32, 2 ),
|
||||
L1NORM_KERNELS( I32, I32, 2 ),
|
||||
L1NORM_KERNELS( I32, U32, 2 ),
|
||||
L1NORM_KERNELS( I32, F32, 2 ),
|
||||
L1NORM_KERNELS( F32, F32, 2 ),
|
||||
L1NORM_KERNELS( F32, U32, 2 ),
|
||||
L1NORM_KERNELS( F32, I32, 2 ),
|
||||
|
||||
L1NORM_KERNELS_2D( U32, U32, 0 ),
|
||||
L1NORM_KERNELS_2D( U32, I32, 0 ),
|
||||
L1NORM_KERNELS_2D( U32, F32, 0 ),
|
||||
L1NORM_KERNELS_2D( I32, I32, 0 ),
|
||||
L1NORM_KERNELS_2D( I32, U32, 0 ),
|
||||
L1NORM_KERNELS_2D( I32, F32, 0 ),
|
||||
L1NORM_KERNELS_2D( F32, F32, 0 ),
|
||||
L1NORM_KERNELS_2D( F32, U32, 0 ),
|
||||
L1NORM_KERNELS_2D( F32, I32, 0 ),
|
||||
|
||||
L1NORM_KERNELS_2D( U32, U32, 1 ),
|
||||
L1NORM_KERNELS_2D( U32, I32, 1 ),
|
||||
L1NORM_KERNELS_2D( U32, F32, 1 ),
|
||||
L1NORM_KERNELS_2D( I32, I32, 1 ),
|
||||
L1NORM_KERNELS_2D( I32, U32, 1 ),
|
||||
L1NORM_KERNELS_2D( I32, F32, 1 ),
|
||||
L1NORM_KERNELS_2D( F32, F32, 1 ),
|
||||
L1NORM_KERNELS_2D( F32, U32, 1 ),
|
||||
L1NORM_KERNELS_2D( F32, I32, 1 ),
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _l1norm_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
|
||||
|
||||
// Add kererl parameters here
|
||||
};
|
||||
#define _L1NORM_PARAM_NUM _cnt_of_array( _l1norm_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_l1norm_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[1];
|
||||
vx_int32 axis = 0;
|
||||
vx_int32 dim = 0;
|
||||
vx_int32 width = 0;
|
||||
vx_int32 height = 0;
|
||||
vx_int32 depth = 0;
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &axis);
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
||||
output_shape = output_attr->shape;
|
||||
|
||||
dim = output_shape->size < 3 ? 2 : 3;
|
||||
width = (vx_int32)output_shape->data[0];
|
||||
height = (vx_int32)output_shape->data[1];
|
||||
depth = dim < 3 ? 1 : (vx_int32)output_shape->data[2];
|
||||
|
||||
gpu_param.dim = dim;
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
if (axis == 0)
|
||||
{
|
||||
gpu_param.local_size[0] = 16;
|
||||
gpu_param.local_size[1] = 1;
|
||||
gpu_param.local_size[2] = 1;
|
||||
gpu_param.global_size[0] = 16;
|
||||
gpu_param.global_size[1] = height;
|
||||
gpu_param.global_size[2] = depth;
|
||||
}
|
||||
else if (axis == 1)
|
||||
{
|
||||
gpu_param.local_size[0] = 1;
|
||||
gpu_param.local_size[1] = 16;
|
||||
gpu_param.local_size[2] = 1;
|
||||
gpu_param.global_size[0] = width;
|
||||
gpu_param.global_size[1] = 16;
|
||||
gpu_param.global_size[2] = depth;
|
||||
}
|
||||
else
|
||||
{
|
||||
gpu_param.local_size[0] = 1;
|
||||
gpu_param.local_size[1] = 1;
|
||||
gpu_param.local_size[2] = 16;
|
||||
|
||||
gpu_param.global_size[0] = width;
|
||||
gpu_param.global_size[1] = height;
|
||||
gpu_param.global_size[2] = 16;
|
||||
}
|
||||
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
|
||||
final:
|
||||
if (output_attr)
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release(&output_attr);
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _l1norm_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
vsi_bool image_2d,
|
||||
int32_t axis
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype;
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
const _kernel_map_type * kernel_map = _l1norm_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _l1norm_kernel_map );
|
||||
vx_param_description_t * param_def = _l1norm_kernel_param_def;
|
||||
vx_kernel_initialize_f initializer = _l1norm_initializer;
|
||||
|
||||
uint32_t key;
|
||||
uint32_t i;
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (F16 == in_dtype)
|
||||
{
|
||||
in_dtype = F32;
|
||||
}
|
||||
else if (U8 == in_dtype)
|
||||
{
|
||||
in_dtype = U32;
|
||||
}
|
||||
else if (I16 == in_dtype || I8 == in_dtype)
|
||||
{
|
||||
in_dtype = I32;
|
||||
}
|
||||
|
||||
if (F16 == out_dtype)
|
||||
{
|
||||
out_dtype = F32;
|
||||
}
|
||||
else if (U8 == out_dtype)
|
||||
{
|
||||
out_dtype = U32;
|
||||
}
|
||||
else if (I16 == out_dtype || I8 == out_dtype)
|
||||
{
|
||||
out_dtype = I32;
|
||||
}
|
||||
|
||||
key = L1NORM_HASH_KEY( in_dtype, out_dtype, image_2d, axis);
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _l1norm_kernel_param_def );
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_L1NORM_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_bool image_2d = FALSE;
|
||||
int32_t axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float inputZp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
int32_t axis_size = (int32_t)outputs[0]->attr.size[axis];
|
||||
outputScale = 1.0f / outputScale;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
image_2d = (outputs[0]->attr.dim_num == 2);
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, image_2d, axis );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
uint32_t index = 2;
|
||||
vsi_nn_kernel_node_pack_io( node_params, _L1NORM_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputZp );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis_size );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _L1NORM_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[2] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[4] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[5] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( l1norm, _setup )
|
||||
|
||||
|
|
@ -35,6 +35,7 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -212,27 +213,52 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_LOGICAL_NOT_PARAM_NUM] = {NULL};
|
||||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret = FALSE;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shape, &new_rank );
|
||||
|
||||
if ( ret )
|
||||
{
|
||||
return NULL;
|
||||
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], shape, new_rank );
|
||||
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], shape, new_rank );
|
||||
}
|
||||
else
|
||||
{
|
||||
goto final;
|
||||
}
|
||||
|
||||
image_2d = (outputs[0]->attr.dim_num == 2 || outputs[0]->attr.size[2] == 1);
|
||||
status = _query_kernel( kernel, inputs, outputs, image_2d);
|
||||
if( VSI_SUCCESS == status)
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[1]->attr.size,
|
||||
reshape_tensors[1]->attr.dim_num ) )
|
||||
{
|
||||
goto final;
|
||||
}
|
||||
|
||||
image_2d = (reshape_tensors[1]->attr.dim_num == 2 || reshape_tensors[1]->attr.size[2] == 1);
|
||||
status = _query_kernel( kernel, &reshape_tensors[0], &reshape_tensors[1], image_2d);
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_NOT_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
&reshape_tensors[0], input_num, &reshape_tensors[1], output_num );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_NOT_PARAM_NUM );
|
||||
}
|
||||
}
|
||||
|
||||
final:
|
||||
vsi_safe_release_tensor( reshape_tensors[0] );
|
||||
vsi_safe_release_tensor( reshape_tensors[1] );
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -228,30 +228,75 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_LOGICAL_OPS_PARAM_NUM] = {NULL};
|
||||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{ 1 }};
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret = FALSE;
|
||||
uint32_t ops_type = vsi_nn_kernel_param_get_int32( params, "ops_type" );
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
ret = vsi_nn_kernel_optimize_eltwise_shape(
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
shapes[0], shapes[1], shapes[2], &new_rank );
|
||||
|
||||
if ( ret )
|
||||
{
|
||||
return NULL;
|
||||
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], shapes[0], new_rank );
|
||||
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[1], shapes[1], new_rank );
|
||||
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], shapes[2], new_rank );
|
||||
|
||||
#define _swap_tensor(a, b, tmp) \
|
||||
do { \
|
||||
tmp = a; \
|
||||
a = b; \
|
||||
b = tmp; \
|
||||
} while(0)
|
||||
|
||||
if (shapes[1][3] > shapes[0][3] && new_rank == 4)
|
||||
{
|
||||
vsi_nn_tensor_t* reshape_tmp;
|
||||
_swap_tensor(reshape_tensors[0], reshape_tensors[1], reshape_tmp);
|
||||
}
|
||||
|
||||
#undef _swap_tensor
|
||||
}
|
||||
else
|
||||
{
|
||||
goto final;
|
||||
}
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[2]->attr.size,
|
||||
reshape_tensors[2]->attr.dim_num ) )
|
||||
{
|
||||
goto final;
|
||||
}
|
||||
|
||||
image_2d = (outputs[0]->attr.dim_num == 2);
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, image_2d, (vsi_nn_logical_ops_type_t)ops_type);
|
||||
status = _query_kernel( kernel, reshape_tensors, &reshape_tensors[2],
|
||||
image_2d, (vsi_nn_logical_ops_type_t)ops_type);
|
||||
|
||||
if( VSI_SUCCESS == status)
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
if ( node )
|
||||
{
|
||||
/* Pass parameters to node. */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_OPS_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
reshape_tensors, input_num, &reshape_tensors[2], output_num );
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_OPS_PARAM_NUM );
|
||||
}
|
||||
}
|
||||
|
||||
final:
|
||||
vsi_safe_release_tensor( reshape_tensors[0] );
|
||||
vsi_safe_release_tensor( reshape_tensors[1] );
|
||||
vsi_safe_release_tensor( reshape_tensors[2] );
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -64,12 +64,12 @@ __BEGIN_DECLS
|
|||
|
||||
#define TENSOR_MATRIXMUL_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
|
||||
{ HASH_MATRIXMUL_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, 0), \
|
||||
HASH_MATRIXMUL_SH_KERNEL_NAME(F32, F32, F32, IMAGE_DIM), \
|
||||
HASH_MATRIXMUL_SH_KERNEL_NAME(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM), \
|
||||
SOURCE },
|
||||
|
||||
#define TENSOR_MATRIXMUL_TRANSA_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
|
||||
{ HASH_MATRIXMUL_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, 1), \
|
||||
HASH_MATRIXMUL_TRANSA_SH_KERNEL_NAME(F32, F32, F32, IMAGE_DIM), \
|
||||
HASH_MATRIXMUL_TRANSA_SH_KERNEL_NAME(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM), \
|
||||
SOURCE },
|
||||
|
||||
#define TENSOR_MATRIXMUL_TRANSB_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
|
||||
|
|
@ -83,18 +83,32 @@ static const struct {
|
|||
const char* source_name;
|
||||
} matrixmul_map[] =
|
||||
{
|
||||
TENSOR_MATRIXMUL_KERNELS(F16, F16, F16, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(F16, F16, F16, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(F16, F16, F16, _2D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(F16, F16, F16, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_2)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_1)
|
||||
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_1)
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -198,10 +212,44 @@ static vsi_status _query_kernel
|
|||
dim_type = _3D;
|
||||
}
|
||||
|
||||
if (input0_dtype == I16 || input0_dtype == I32)
|
||||
{
|
||||
input0_dtype = I8;
|
||||
}
|
||||
else if (input0_dtype == F16)
|
||||
{
|
||||
input0_dtype = F32;
|
||||
}
|
||||
else if (input0_dtype == U32)
|
||||
{
|
||||
input0_dtype = U8;
|
||||
}
|
||||
|
||||
if (input1_dtype == I16 || input1_dtype == I32)
|
||||
{
|
||||
input1_dtype = I8;
|
||||
}
|
||||
else if (input1_dtype == F16)
|
||||
{
|
||||
input1_dtype = F32;
|
||||
}
|
||||
else if (input1_dtype == U32)
|
||||
{
|
||||
input1_dtype = U8;
|
||||
}
|
||||
|
||||
if (output_dtype == I16 || output_dtype == I32)
|
||||
{
|
||||
output_dtype = I8;
|
||||
}
|
||||
else if (output_dtype == F16)
|
||||
{
|
||||
output_dtype = F32;
|
||||
}
|
||||
else if (output_dtype == U32)
|
||||
{
|
||||
output_dtype = U8;
|
||||
}
|
||||
|
||||
key = HASH_MATRIXMUL_KEY( input0_dtype, input1_dtype, output_dtype, dim_type, transa );
|
||||
|
||||
|
|
@ -260,6 +308,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
float scale_out = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float zp_out = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
|
||||
scale_out = 1 / scale_out;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
|
|
|
|||
|
|
@ -0,0 +1,330 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
|
||||
#define _MAXUNPOOL_KERNEL_SOURCE_NAME "maxunpool"
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define MAXUNPOOL_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE ) \
|
||||
(( IN_DTYPE0 << 16 ) | ( IN_DTYPE1 << 8 ) | ( OUT_DTYPE ))
|
||||
#define MAXUNPOOL_KERNELS( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE ) \
|
||||
{ MAXUNPOOL_HASH_KEY( IN_DTYPE0, I32, OUT_DTYPE ), \
|
||||
CVIVANTE_NAMESPACE("cl.maxunpool_"#IN_DTYPE0"to"#OUT_DTYPE), \
|
||||
_MAXUNPOOL_KERNEL_SOURCE_NAME },
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _maxunpool_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
MAXUNPOOL_KERNELS( F32, I32, F32)
|
||||
MAXUNPOOL_KERNELS( F32, I32, U32)
|
||||
MAXUNPOOL_KERNELS( F32, I32, I32)
|
||||
MAXUNPOOL_KERNELS( U32, I32, U32)
|
||||
MAXUNPOOL_KERNELS( U32, I32, F32)
|
||||
MAXUNPOOL_KERNELS( I32, I32, I32)
|
||||
MAXUNPOOL_KERNELS( I32, I32, F32)
|
||||
MAXUNPOOL_KERNELS( BF16, I32, BF16)
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
|
||||
static vx_param_description_t _maxunpool_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _MAXUNPOOL_PARAM_NUM _cnt_of_array( _maxunpool_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_maxunpool_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[2];
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
||||
output_shape = output_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
gpu_param.global_size[0] = (output_shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0];
|
||||
gpu_param.global_size[1] = (output_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||
/ gpu_param.global_scale[1];
|
||||
gpu_param.global_size[2] = (output_shape->data[2] + gpu_param.global_scale[2] - 1)
|
||||
/ gpu_param.global_scale[2];
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
|
||||
final:
|
||||
if (output_attr)
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release(&output_attr);
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _maxunpool_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype;
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
const _kernel_map_type * kernel_map = _maxunpool_kernel_map;
|
||||
vx_kernel_initialize_f initializer = _maxunpool_initializer;
|
||||
vx_param_description_t * param_def = _maxunpool_kernel_param_def;
|
||||
size_t kernel_map_size = _cnt_of_array( _maxunpool_kernel_map );
|
||||
size_t param_size = _cnt_of_array( _maxunpool_kernel_param_def );
|
||||
uint32_t key;
|
||||
uint32_t i;
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
|
||||
(( in_dtype ) | (out_dtype << 8 ))
|
||||
switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
|
||||
{
|
||||
case _PACK_SELECT_KEY(F32, F32):
|
||||
case _PACK_SELECT_KEY(F16, F16):
|
||||
case _PACK_SELECT_KEY(F32, F16):
|
||||
case _PACK_SELECT_KEY(F16, F32):
|
||||
key = MAXUNPOOL_HASH_KEY( F32, I32, F32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, U8):
|
||||
case _PACK_SELECT_KEY(F16, U8):
|
||||
key = MAXUNPOOL_HASH_KEY( F32, I32, U32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, I8):
|
||||
case _PACK_SELECT_KEY(F32, I16):
|
||||
case _PACK_SELECT_KEY(F16, I8):
|
||||
case _PACK_SELECT_KEY(F16, I16):
|
||||
key = MAXUNPOOL_HASH_KEY( F32, I32, I32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, U8):
|
||||
key = MAXUNPOOL_HASH_KEY( U32, I32, U32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, F16):
|
||||
case _PACK_SELECT_KEY(U8, F32):
|
||||
key = MAXUNPOOL_HASH_KEY( U32, I32, F32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, I8):
|
||||
case _PACK_SELECT_KEY(I8, I16):
|
||||
case _PACK_SELECT_KEY(I16, I8):
|
||||
case _PACK_SELECT_KEY(I16, I16):
|
||||
key = MAXUNPOOL_HASH_KEY( I32, I32, I32);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, F16):
|
||||
case _PACK_SELECT_KEY(I8, F32):
|
||||
case _PACK_SELECT_KEY(I16, F16):
|
||||
case _PACK_SELECT_KEY(I16, F32):
|
||||
key = MAXUNPOOL_HASH_KEY( I32, I32, F32);
|
||||
break;
|
||||
default:
|
||||
key = MAXUNPOOL_HASH_KEY( in_dtype, I32, out_dtype);
|
||||
break;
|
||||
}
|
||||
#undef _PACK_SELECT_KEY
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = (uint32_t)param_size;
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
||||
"eltwise_ops_helper",
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_MAXUNPOOL_PARAM_NUM] = { NULL };
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
|
||||
int32_t pad_right = vsi_nn_kernel_param_get_int32(params, "pad_right");
|
||||
int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
|
||||
int32_t pad_bottom = vsi_nn_kernel_param_get_int32(params, "pad_bottom");
|
||||
int32_t width_in = (int32_t)inputs[0]->attr.size[0];
|
||||
int32_t height_in = (int32_t)inputs[0]->attr.size[1];
|
||||
int32_t width = (int32_t)outputs[0]->attr.size[0];
|
||||
int32_t height = (int32_t)outputs[0]->attr.size[1];
|
||||
int32_t batch = (int32_t)outputs[0]->attr.size[2];
|
||||
int32_t width_nopad = width - pad_left - pad_right;
|
||||
int32_t height_nopad = height - pad_top - pad_bottom;
|
||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
uint32_t index = 3;
|
||||
vsi_nn_kernel_node_pack_io( node_params, _MAXUNPOOL_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width_nopad );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height_nopad );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width_in );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height_in );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &batch );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _MAXUNPOOL_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[4] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[5] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[7] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[8] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[9] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[10] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[11] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[12] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[13] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( maxunpool, _setup )
|
||||
|
||||
|
|
@ -81,9 +81,11 @@ static const struct {
|
|||
{
|
||||
TENSOR_POW_KERNELS_FLOAT(F32, F32, F32, KERNEL_SOURCE_1)
|
||||
TENSOR_POW_KERNELS_FLOAT(F16, F16, F16, KERNEL_SOURCE_1)
|
||||
TENSOR_POW_KERNELS(U32, F32, U32, KERNEL_SOURCE_1)
|
||||
|
||||
TENSOR_POW_KERNELS_2D_FLOAT(F32, F32, F32, KERNEL_SOURCE_1)
|
||||
TENSOR_POW_KERNELS_2D_FLOAT(F16, F16, F16, KERNEL_SOURCE_1)
|
||||
TENSOR_POW_KERNELS_2D(U32, F32, U32, KERNEL_SOURCE_1)
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -94,6 +96,10 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
#define _CL_PARAM_NUM _cnt_of_array(kernel_param_def)
|
||||
|
|
@ -179,7 +185,25 @@ static vsi_status _query_kernel
|
|||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
key = HASH_POW_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
|
||||
|
||||
#define _PACK_SELECT_KEY( input0_dtype, input1_dtype, output_dtype) \
|
||||
((input0_dtype) | (input1_dtype << 8) | (output_dtype << 16))
|
||||
switch(_PACK_SELECT_KEY(input0_dtype, input1_dtype, output_dtype))
|
||||
{
|
||||
case _PACK_SELECT_KEY(F16, F16, F16):
|
||||
case _PACK_SELECT_KEY(F32, F32, F32):
|
||||
key = HASH_POW_KEY( F32, F32, F32, image_2d );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, F16, U8):
|
||||
case _PACK_SELECT_KEY(U8, F32, U8):
|
||||
case _PACK_SELECT_KEY(U32, F16, U32):
|
||||
case _PACK_SELECT_KEY(U32, F32, U32):
|
||||
key = HASH_POW_KEY( U32, F32, U32, image_2d );
|
||||
break;
|
||||
default:
|
||||
key = HASH_POW_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
|
||||
break;
|
||||
}
|
||||
|
||||
for( i = 0; i < _cnt_of_array(pow_map); i ++ )
|
||||
{
|
||||
|
|
@ -219,6 +243,13 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_CL_PARAM_NUM] = {NULL};
|
||||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
|
|
@ -234,11 +265,20 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
if( node )
|
||||
{
|
||||
uint32_t index = 3;
|
||||
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
|
||||
inputs, 2, outputs, 1 );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[4] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[5] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,307 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
|
||||
#define _REVERSESEQUENCE_KERNEL_SOURCE_NAME "reversesequence"
|
||||
|
||||
// Add kernel hashtable here
|
||||
#define REVERSESEQUENCE_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ) \
|
||||
(( IN_DTYPE0 << 24 ) | ( IN_DTYPE1 << 16 ) | ( OUT_DTYPE << 8) | (batch_axis) )
|
||||
#define REVERSESEQUENCE_KERNELS( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ) \
|
||||
{ REVERSESEQUENCE_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ), \
|
||||
CVIVANTE_NAMESPACE("cl.reversesequence_"#IN_DTYPE0"to"#OUT_DTYPE#batch_axis), \
|
||||
_REVERSESEQUENCE_KERNEL_SOURCE_NAME },
|
||||
|
||||
typedef enum
|
||||
{
|
||||
_axis1 = 0,
|
||||
_axis2
|
||||
} vsi_nn_kernel_batch_axis_type_e;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _reversesequence_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
REVERSESEQUENCE_KERNELS( F32, I32, F32, _axis1)
|
||||
REVERSESEQUENCE_KERNELS( F32, I32, U32, _axis1)
|
||||
REVERSESEQUENCE_KERNELS( F32, I32, I32, _axis1)
|
||||
REVERSESEQUENCE_KERNELS( U32, I32, U32, _axis1)
|
||||
REVERSESEQUENCE_KERNELS( U32, I32, F32, _axis1)
|
||||
REVERSESEQUENCE_KERNELS( I32, I32, I32, _axis1)
|
||||
REVERSESEQUENCE_KERNELS( I32, I32, F32, _axis1)
|
||||
REVERSESEQUENCE_KERNELS( BF16, I32, BF16, _axis1)
|
||||
|
||||
REVERSESEQUENCE_KERNELS( F32, I32, F32, _axis2)
|
||||
REVERSESEQUENCE_KERNELS( F32, I32, U32, _axis2)
|
||||
REVERSESEQUENCE_KERNELS( F32, I32, I32, _axis2)
|
||||
REVERSESEQUENCE_KERNELS( U32, I32, U32, _axis2)
|
||||
REVERSESEQUENCE_KERNELS( U32, I32, F32, _axis2)
|
||||
REVERSESEQUENCE_KERNELS( I32, I32, I32, _axis2)
|
||||
REVERSESEQUENCE_KERNELS( I32, I32, F32, _axis2)
|
||||
REVERSESEQUENCE_KERNELS( BF16, I32, BF16, _axis2)
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _reversesequence_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _REVERSESEQUENCE_PARAM_NUM _cnt_of_array( _reversesequence_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_reversesequence_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vx_tensor input = (vx_tensor)param[0];
|
||||
vsi_nn_kernel_tensor_attr_t *input_attr = NULL;
|
||||
vsi_size_array_t *input_shape = NULL;
|
||||
|
||||
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input );
|
||||
CHECK_PTR_FAIL_GOTO( input_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
||||
input_shape = input_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
gpu_param.global_size[0] = (input_shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0];
|
||||
gpu_param.global_size[1] = (input_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||
/ gpu_param.global_scale[1];
|
||||
gpu_param.global_size[2] = (input_shape->data[2] + gpu_param.global_scale[2] - 1)
|
||||
/ gpu_param.global_scale[2];
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
final:
|
||||
if (input_attr)
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release(&input_attr);
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _reversesequence_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
int32_t batch_axis
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype;
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
const _kernel_map_type * kernel_map = _reversesequence_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _reversesequence_kernel_map );
|
||||
vx_param_description_t * param_def = _reversesequence_kernel_param_def;
|
||||
vx_kernel_initialize_f initializer = _reversesequence_initializer;
|
||||
vsi_nn_kernel_batch_axis_type_e axis_type = _axis1;
|
||||
uint32_t key;
|
||||
uint32_t i;
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (batch_axis == 2)
|
||||
{
|
||||
axis_type = _axis2;
|
||||
}
|
||||
|
||||
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
|
||||
(( in_dtype ) | (out_dtype << 8 ))
|
||||
switch(_PACK_SELECT_KEY( in_dtype, out_dtype ))
|
||||
{
|
||||
case _PACK_SELECT_KEY(F16, F16):
|
||||
case _PACK_SELECT_KEY(F32, F32):
|
||||
key = REVERSESEQUENCE_HASH_KEY( F32, I32, F32, axis_type);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F16, U8):
|
||||
case _PACK_SELECT_KEY(F32, U8):
|
||||
key = REVERSESEQUENCE_HASH_KEY( F32, I32, U32, axis_type);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F16, I8):
|
||||
case _PACK_SELECT_KEY(F32, I8):
|
||||
case _PACK_SELECT_KEY(F16, I16):
|
||||
case _PACK_SELECT_KEY(F32, I16):
|
||||
key = REVERSESEQUENCE_HASH_KEY( F32, I32, I32, axis_type);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, U8):
|
||||
key = REVERSESEQUENCE_HASH_KEY( U32, I32, U32, axis_type);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U8, F16):
|
||||
case _PACK_SELECT_KEY(U8, F32):
|
||||
key = REVERSESEQUENCE_HASH_KEY( U32, I32, F32, axis_type);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, I8):
|
||||
case _PACK_SELECT_KEY(I16, I16):
|
||||
key = REVERSESEQUENCE_HASH_KEY( I32, I32, I32, axis_type);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, F16):
|
||||
case _PACK_SELECT_KEY(I8, F32):
|
||||
case _PACK_SELECT_KEY(I16, F16):
|
||||
case _PACK_SELECT_KEY(I16, F32):
|
||||
key = REVERSESEQUENCE_HASH_KEY( I32, I32, F32, axis_type);
|
||||
break;
|
||||
case _PACK_SELECT_KEY(BF16, BF16):
|
||||
key = REVERSESEQUENCE_HASH_KEY( BF16, I32, BF16, axis_type);
|
||||
break;
|
||||
default:
|
||||
key = REVERSESEQUENCE_HASH_KEY( in_dtype, I32, out_dtype, axis_type);
|
||||
break;
|
||||
}
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _reversesequence_kernel_param_def );
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
||||
"eltwise_ops_helper",
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_REVERSESEQUENCE_PARAM_NUM] = { NULL };
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t batch_axis = vsi_nn_kernel_param_get_int32(params, "batch_axis");
|
||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
float inoutScale = inputScale / outputScale;
|
||||
float inoutTail = outputTail - inputTail * inoutScale;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, batch_axis );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
uint32_t index = 3;
|
||||
vsi_nn_kernel_node_pack_io( node_params, _REVERSESEQUENCE_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inoutScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inoutTail );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _REVERSESEQUENCE_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[4] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( reversesequence, _setup )
|
||||
|
||||
|
|
@ -88,6 +88,7 @@ static vx_param_description_t _roi_align_kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _ROI_ALIGN_PARAM_NUM _cnt_of_array( _roi_align_kernel_param_def )
|
||||
|
||||
|
|
@ -105,8 +106,9 @@ static vx_param_description_t _roi_align_kernel_param_def[] =
|
|||
#define SCALAR_SAMPLING_Y_RATIO (15)
|
||||
#define SCALAR_DEPTH (16)
|
||||
#define SCALAR_FORMAT (17)
|
||||
#define PLATFORM_TYPE (18)
|
||||
|
||||
#define ROI_ALIGN_PARAM_NUM 18
|
||||
#define ROI_ALIGN_PARAM_NUM 19
|
||||
#define ROI_ALIGN_QUANT_PARAM_NUM _cnt_of_array( _roi_align_kernel_param_def )
|
||||
|
||||
/*
|
||||
|
|
@ -250,6 +252,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
float height_ratio = vsi_nn_kernel_param_get_float32( params, "height_ratio" );
|
||||
int32_t width_sample_num = vsi_nn_kernel_param_get_int32( params, "width_sample_num" );
|
||||
int32_t height_sample_num = vsi_nn_kernel_param_get_int32( params, "height_sample_num" );
|
||||
int32_t platform_type = vsi_nn_kernel_param_get_int32( params, "platform_type" );
|
||||
float input_zp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
float input_scale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float input_tail = -(input_zp * input_scale);
|
||||
|
|
@ -318,6 +321,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
node_params[SCALAR_SAMPLING_Y_RATIO] = vsi_nn_kernel_scalar_create( graph, F32, &sampling_y_ratio );
|
||||
node_params[SCALAR_DEPTH] = vsi_nn_kernel_scalar_create( graph, I32, &depth );
|
||||
node_params[SCALAR_FORMAT] = vsi_nn_kernel_scalar_create( graph, I32, &dtype );
|
||||
node_params[PLATFORM_TYPE] = vsi_nn_kernel_scalar_create( graph, I32, &platform_type );
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
|
||||
|
|
@ -336,6 +340,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_DEPTH] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_DEPTH] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_FORMAT] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[PLATFORM_TYPE] );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
|
|||
uint32_t i = 0;
|
||||
vsi_size_t elementCnt = 1;
|
||||
|
||||
if(coordDim != 0 && (width == NULL || area == NULL))
|
||||
if (coordDim != 0 && (width == NULL || area == NULL))
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
|
@ -118,17 +118,17 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
|
|||
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
|
||||
|
||||
newDim[0] = 0;
|
||||
for(i = 0; i < dims_num; ++i)
|
||||
for (i = 0; i < dims_num; ++i)
|
||||
{
|
||||
elementCnt *= input_size[i];
|
||||
}
|
||||
|
||||
for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
|
||||
for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
|
||||
{
|
||||
sizes[i] = 1;
|
||||
}
|
||||
|
||||
if((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
|
||||
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
|
||||
{
|
||||
sizes[0] = block_size;
|
||||
sizes[1] = elementCnt / block_size;
|
||||
|
|
@ -140,17 +140,17 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
|
|||
return status;
|
||||
}
|
||||
|
||||
if(coordDim == 1) // index shape
|
||||
if (coordDim == 1) // index shape
|
||||
{
|
||||
*width = 0;
|
||||
*area = 0;
|
||||
}
|
||||
else if(coordDim == 2)
|
||||
else if (coordDim == 2)
|
||||
{
|
||||
*width = input_size[dims_num - 2];
|
||||
*area = 0;
|
||||
}
|
||||
else if(coordDim == 3)
|
||||
else if (coordDim == 3)
|
||||
{
|
||||
*width = input_size[dims_num - 3];
|
||||
*area = input_size[dims_num - 3] * input_size[dims_num - 2];
|
||||
|
|
@ -226,30 +226,33 @@ static vsi_status _query_kernel
|
|||
|
||||
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
if(coord_dim == 1)
|
||||
if (coord_dim == 1)
|
||||
{
|
||||
coord_type = _1D;
|
||||
}
|
||||
else if(coord_dim == 2)
|
||||
else if (coord_dim == 2)
|
||||
{
|
||||
coord_type = _2D;
|
||||
}
|
||||
else if(coord_dim == 3)
|
||||
else if (coord_dim == 3)
|
||||
{
|
||||
coord_type = _3D;
|
||||
}
|
||||
|
||||
input1_dtype = input1_dtype == F16 ? F32 : input1_dtype;
|
||||
output_dtype = output_dtype == F16 ? F32 : output_dtype;
|
||||
|
||||
key = HASH_SCATTER_ND_KEY( I32, input1_dtype, output_dtype, coord_type );
|
||||
|
||||
for( i = 0; i < _cnt_of_array(scatter_nd_map); i ++ )
|
||||
for ( i = 0; i < _cnt_of_array(scatter_nd_map); i ++ )
|
||||
{
|
||||
if( scatter_nd_map[i].key == key )
|
||||
if ( scatter_nd_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( i < _cnt_of_array(scatter_nd_map) )
|
||||
if ( i < _cnt_of_array(scatter_nd_map) )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", scatter_nd_map[i].function_name );
|
||||
kernel->info.parameters = _scatter_nd_kernel_param_def;
|
||||
|
|
@ -287,26 +290,31 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
||||
vsi_size_t width = 0, area = 0;
|
||||
|
||||
status = cal_scatter_nd_tensor_reshape_size(&inputs[0], shapes[0], coord_dim, 0, NULL, NULL, &rs_in_dim);
|
||||
status |= cal_scatter_nd_tensor_reshape_size(&inputs[1], shapes[1], block_size, 0, NULL, NULL, &rs_idx_dim);
|
||||
status |= cal_scatter_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, coord_dim,
|
||||
&width, &area, &rs_out_dim);
|
||||
if(status != VSI_SUCCESS)
|
||||
if (coord_dim > 3)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
status = cal_scatter_nd_tensor_reshape_size(&inputs[0], shapes[0], coord_dim, 0, NULL, NULL, &rs_in_dim);
|
||||
status |= cal_scatter_nd_tensor_reshape_size(&inputs[1], shapes[1], block_size, 0, NULL, NULL, &rs_idx_dim);
|
||||
status |= cal_scatter_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, coord_dim,
|
||||
&width, &area, &rs_out_dim);
|
||||
if (status != VSI_SUCCESS)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, coord_dim );
|
||||
if( VSI_SUCCESS == status)
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
if ( node )
|
||||
{
|
||||
uint32_t index = 0;
|
||||
/* Pass parameters to node. */
|
||||
|
|
|
|||
|
|
@ -111,12 +111,12 @@ static vsi_status cal_scatter_nd_update_tensor_reshape_size
|
|||
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
|
||||
|
||||
newDim[0] = 0;
|
||||
for(i = 0; i < dims_num; ++i)
|
||||
for (i = 0; i < dims_num; ++i)
|
||||
{
|
||||
elementCnt *= input_size[i];
|
||||
}
|
||||
|
||||
for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
|
||||
for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
|
||||
{
|
||||
sizes[i] = 1;
|
||||
}
|
||||
|
|
@ -235,7 +235,7 @@ static vsi_status _query_kernel
|
|||
|
||||
key = HASH_SCATTER_ND_UPDATE_KEY( input0_dtype, input2_dtype, output_dtype, 0 );
|
||||
|
||||
for( i = 0; i < _cnt_of_array(scatter_nd_update_map); i ++ )
|
||||
for ( i = 0; i < _cnt_of_array(scatter_nd_update_map); i ++ )
|
||||
{
|
||||
if ( scatter_nd_update_map[i].key == key )
|
||||
{
|
||||
|
|
@ -281,6 +281,13 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
||||
vsi_size_t width = 0, area = 0, vol = 0;
|
||||
int32_t offsetX = 0, offsetY = 0, offsetZ = 0, offsetW = 0, offset_idx = 0;
|
||||
vsi_size_t *input_size = inputs[2]->attr.size;
|
||||
uint32_t dims_num = inputs[2]->attr.dim_num;
|
||||
|
||||
if (coord_dim > 4 && input_size[dims_num - 1] > 1)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
status = cal_scatter_nd_update_tensor_reshape_size(&inputs[1], shapes[0],
|
||||
coord_dim, 0, NULL, NULL, NULL, &rs_in_dim);
|
||||
|
|
|
|||
|
|
@ -113,6 +113,8 @@ static const _kernel_map_type _swish_kernel_map[] =
|
|||
SWISH_PACK_KERNEL_MAP_2D(U8, U8),
|
||||
SWISH_PACK_KERNEL_MAP(I32, I32),
|
||||
SWISH_PACK_KERNEL_MAP_2D(I32, I32),
|
||||
SWISH_PACK_KERNEL_MAP(F32, U8),
|
||||
SWISH_PACK_KERNEL_MAP_2D(F32, U8),
|
||||
HSWISH_PACK_KERNEL_FLOAT_MAP(F32, F32),
|
||||
HSWISH_PACK_KERNEL_FLOAT_MAP_2D(F32, F32),
|
||||
HSWISH_PACK_KERNEL_FLOAT_MAP(F16, F16),
|
||||
|
|
@ -222,6 +224,11 @@ static vsi_status _query_kernel
|
|||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (in_dtype == F16)
|
||||
in_dtype = F32;
|
||||
if (out_dtype == F16)
|
||||
out_dtype = F32;
|
||||
|
||||
key = SWISH_HASH_KEY(swish_type, in_dtype, out_dtype, image_2d);
|
||||
|
||||
for( i = 0; i < kernel_map_size; i ++ )
|
||||
|
|
|
|||
|
|
@ -279,7 +279,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret = FALSE;
|
||||
uint32_t dim = inputs[0]->attr.dim_num;
|
||||
vsi_size_t multiples[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
vsi_size_t multiples[VSI_NN_MAX_DIM_NUM] = { 1, 1, 1, 1 };
|
||||
|
||||
for ( i = 0; i < dim; i++)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -55,6 +55,13 @@ __BEGIN_DECLS
|
|||
CVIVANTE_NAMESPACE("cl.topk_odd_even_sort_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
|
||||
"topk_odd_even_sort" }
|
||||
|
||||
#define TOPK_ODD_EVEN_SORT_HASH_KEY2( IN_DTYPE, OUT_DTYPE ) \
|
||||
( ( IN_DTYPE ) | ( OUT_DTYPE << 8 ) )
|
||||
#define PACK_ODD_EVEN_SORT_KERNEL_MAP2( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ TOPK_ODD_EVEN_SORT_HASH_KEY2( IN_DTYPE, OUT_DTYPE ), \
|
||||
CVIVANTE_NAMESPACE("cl.topk_odd_even_sort_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
|
||||
"topk_odd_even_sort2" }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
|
|
@ -88,6 +95,22 @@ static const _kernel_map_type _topk_kernel_map[] =
|
|||
PACK_KERNEL_MAP( I32, I32, 4 ),
|
||||
PACK_KERNEL_MAP( I32, I32, 5 ),
|
||||
PACK_KERNEL_MAP( I32, I32, 6 ),
|
||||
|
||||
PACK_KERNEL_MAP( F32, U32, 0 ),
|
||||
PACK_KERNEL_MAP( F32, U32, 1 ),
|
||||
PACK_KERNEL_MAP( F32, U32, 2 ),
|
||||
PACK_KERNEL_MAP( F32, U32, 3 ),
|
||||
PACK_KERNEL_MAP( F32, U32, 4 ),
|
||||
PACK_KERNEL_MAP( F32, U32, 5 ),
|
||||
PACK_KERNEL_MAP( F32, U32, 6 ),
|
||||
|
||||
PACK_KERNEL_MAP( F32, I32, 0 ),
|
||||
PACK_KERNEL_MAP( F32, I32, 1 ),
|
||||
PACK_KERNEL_MAP( F32, I32, 2 ),
|
||||
PACK_KERNEL_MAP( F32, I32, 3 ),
|
||||
PACK_KERNEL_MAP( F32, I32, 4 ),
|
||||
PACK_KERNEL_MAP( F32, I32, 5 ),
|
||||
PACK_KERNEL_MAP( F32, I32, 6 ),
|
||||
};
|
||||
|
||||
static const _kernel_map_type _topk_odd_even_sort_kernel_map[] =
|
||||
|
|
@ -96,6 +119,8 @@ static const _kernel_map_type _topk_odd_even_sort_kernel_map[] =
|
|||
PACK_ODD_EVEN_SORT_KERNEL_MAP( F32, F32 ),
|
||||
PACK_ODD_EVEN_SORT_KERNEL_MAP( U32, U32 ),
|
||||
PACK_ODD_EVEN_SORT_KERNEL_MAP( I32, I32 ),
|
||||
PACK_ODD_EVEN_SORT_KERNEL_MAP2( F32, U32 ),
|
||||
PACK_ODD_EVEN_SORT_KERNEL_MAP2( F32, I32 ),
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -108,11 +133,15 @@ static vx_param_description_t _topk_kernel_param_def[] =
|
|||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
// Add kererl parameters here
|
||||
};
|
||||
#define _TOPK_PARAM_NUM _cnt_of_array( _topk_kernel_param_def )
|
||||
#define SCALAR_INPUT_NUM_STAGES (3)
|
||||
#define SCALAR_INPUT_WIDTH (4)
|
||||
#define SCALAR_INPUT_NUM_STAGES (7)
|
||||
#define SCALAR_INPUT_WIDTH (8)
|
||||
|
||||
static vx_param_description_t _topk_odd_even_sort_kernel_param_def[] =
|
||||
{
|
||||
|
|
@ -122,10 +151,14 @@ static vx_param_description_t _topk_odd_even_sort_kernel_param_def[] =
|
|||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
// Add kererl parameters here
|
||||
};
|
||||
#define _TOPK_ODD_EVEN_SORT_PARAM_NUM _cnt_of_array( _topk_odd_even_sort_kernel_param_def )
|
||||
#define SCALAR_INPUT_SIZE (5)
|
||||
#define SCALAR_INPUT_SIZE (9)
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
|
|
@ -251,6 +284,22 @@ static vsi_status _query_kernel
|
|||
case _PACK_SELECT_KEY(I8, I8):
|
||||
key = TOPK_HASH_KEY( I32, I32, num_stages );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, U32):
|
||||
case _PACK_SELECT_KEY(F16, U32):
|
||||
case _PACK_SELECT_KEY(F32, U16):
|
||||
case _PACK_SELECT_KEY(F16, U16):
|
||||
case _PACK_SELECT_KEY(F32, U8):
|
||||
case _PACK_SELECT_KEY(F16, U8):
|
||||
key = TOPK_HASH_KEY( F32, U32, num_stages );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, I32):
|
||||
case _PACK_SELECT_KEY(F16, I32):
|
||||
case _PACK_SELECT_KEY(F32, I16):
|
||||
case _PACK_SELECT_KEY(F16, I16):
|
||||
case _PACK_SELECT_KEY(F32, I8):
|
||||
case _PACK_SELECT_KEY(F16, I8):
|
||||
key = TOPK_HASH_KEY( F32, I32, num_stages );
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -318,6 +367,22 @@ static vsi_status _query_odd_even_sort_kernel
|
|||
case _PACK_SELECT_KEY(I8, I8):
|
||||
key = TOPK_ODD_EVEN_SORT_HASH_KEY( I32, I32 );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, U32):
|
||||
case _PACK_SELECT_KEY(F16, U32):
|
||||
case _PACK_SELECT_KEY(F32, U16):
|
||||
case _PACK_SELECT_KEY(F16, U16):
|
||||
case _PACK_SELECT_KEY(F32, U8):
|
||||
case _PACK_SELECT_KEY(F16, U8):
|
||||
key = TOPK_ODD_EVEN_SORT_HASH_KEY2( F32, U32 );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, I32):
|
||||
case _PACK_SELECT_KEY(F16, I32):
|
||||
case _PACK_SELECT_KEY(F32, I16):
|
||||
case _PACK_SELECT_KEY(F16, I16):
|
||||
case _PACK_SELECT_KEY(F32, I8):
|
||||
case _PACK_SELECT_KEY(F16, I8):
|
||||
key = TOPK_ODD_EVEN_SORT_HASH_KEY2( F32, I32 );
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -372,14 +437,24 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t num_stages = (int32_t)ceil(log10(block_size / 2.0f) / log10(2.0f));
|
||||
vsi_bool is_odd_even_sort = FALSE;
|
||||
size_t param_num = _TOPK_PARAM_NUM;
|
||||
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
||||
for (i = 1; i < inputs[0]->attr.dim_num; i ++)
|
||||
{
|
||||
block_num = block_num * inputs[0]->attr.size[i];
|
||||
}
|
||||
|
||||
if( vsi_nn_is_same_type(inputs[0], outputs[0]) == FALSE ||
|
||||
outputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_INT32 )
|
||||
if ((vsi_nn_is_same_type(inputs[0], outputs[0]) == FALSE ||
|
||||
outputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_INT32 ) &&
|
||||
!(inputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_FLOAT16 &&
|
||||
(outputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_UINT8 ||
|
||||
outputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_INT16)))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -425,10 +500,15 @@ static vsi_nn_kernel_node_t _setup
|
|||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
uint32_t index = (uint32_t)(input_num + output_num);
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, param_num,
|
||||
rs_tensors, input_num, &rs_tensors[input_num], output_num );
|
||||
/* Pass parameters to node. */
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &inputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &inputTail );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &outputScale );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &outputTail );
|
||||
if (is_odd_even_sort)
|
||||
{
|
||||
node_params[SCALAR_INPUT_SIZE] = vsi_nn_kernel_scalar_create(
|
||||
|
|
@ -452,8 +532,25 @@ final:
|
|||
vsi_safe_release_tensor(rs_tensors[2]);
|
||||
vsi_safe_release_tensor(rs_tensors[3]);
|
||||
vsi_safe_release_tensor(rs_tensors[4]);
|
||||
|
||||
if (is_odd_even_sort)
|
||||
{
|
||||
if (node_params[5])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[5] );
|
||||
}
|
||||
if (node_params[6])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
}
|
||||
if (node_params[7])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[7] );
|
||||
}
|
||||
if (node_params[8])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[8] );
|
||||
}
|
||||
if (node_params[SCALAR_INPUT_SIZE])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SIZE] );
|
||||
|
|
@ -461,6 +558,22 @@ final:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (node_params[3])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
}
|
||||
if (node_params[4])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[4] );
|
||||
}
|
||||
if (node_params[5])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[5] );
|
||||
}
|
||||
if (node_params[6])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
}
|
||||
if (node_params[SCALAR_INPUT_NUM_STAGES])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_NUM_STAGES] );
|
||||
|
|
|
|||
|
|
@ -1,243 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (2)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _CPU_IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.add_mean_std_norm")
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _add_mean_std_norm_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _ADD_MEAN_STD_NORM_PARAM_NUM _cnt_of_array( _add_mean_std_norm_kernel_param_def )
|
||||
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
int32_t i;
|
||||
float mean = .0f, stddev_inv = .0f, variance = .0f, input_d = .0f, data = .0f, eps = .0f;
|
||||
vsi_ssize_t v_size, n_batch, batch;
|
||||
/* prepare data */
|
||||
for(i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[_CPU_IO_NUM], &(eps));
|
||||
v_size = in_attr[0]->shape->data[0];
|
||||
n_batch = in_attr[0]->shape->data[1];
|
||||
|
||||
for (batch = 0; batch < n_batch; ++batch)
|
||||
{
|
||||
float sum = 0.0f;
|
||||
float sum_sq = 0.0f;
|
||||
vsi_ssize_t index_base = batch * v_size;
|
||||
for (i = 0; i < v_size; ++i)
|
||||
{
|
||||
vsi_ssize_t index = i + index_base;
|
||||
input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
|
||||
sum += input_d;
|
||||
sum_sq += input_d * input_d;
|
||||
}
|
||||
|
||||
mean = sum / v_size;
|
||||
stddev_inv = 0.0f;
|
||||
variance = sum_sq / v_size - mean * mean;
|
||||
|
||||
if (variance == 0)
|
||||
{
|
||||
stddev_inv = (float)(1.0f / sqrt(eps));
|
||||
}
|
||||
else
|
||||
{
|
||||
stddev_inv = (float)(1.0f / sqrt(variance));
|
||||
}
|
||||
|
||||
for (i = 0; i < v_size; ++i)
|
||||
{
|
||||
vsi_ssize_t index = i + index_base;
|
||||
input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
|
||||
data = (input_d - mean) * stddev_inv;
|
||||
f32_out_buffer[0][index] = data;
|
||||
}
|
||||
}
|
||||
|
||||
/* save data */
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
if (f32_in_buffer[i])
|
||||
{
|
||||
free(f32_in_buffer[i]);
|
||||
f32_in_buffer[i] = NULL;
|
||||
}
|
||||
|
||||
if (in_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (f32_out_buffer[i])
|
||||
{
|
||||
free(f32_out_buffer[i]);
|
||||
f32_out_buffer[i] = NULL;
|
||||
}
|
||||
|
||||
if (out_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _add_mean_std_norm_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _add_mean_std_norm_kernel_param_def );
|
||||
status = VSI_SUCCESS;
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_ADD_MEAN_STD_NORM_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _ADD_MEAN_STD_NORM_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[_CPU_IO_NUM] = vsi_nn_kernel_scalar_create( graph, F32, &eps );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _ADD_MEAN_STD_NORM_PARAM_NUM );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[_CPU_IO_NUM] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( add_mean_std_norm, _setup )
|
||||
|
|
@ -1,201 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define _CPU_ARG_NUM (1)
|
||||
#define _CPU_INPUT_NUM (1)
|
||||
#define _CPU_OUTPUT_NUM (1)
|
||||
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
|
||||
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("argmax_sw")
|
||||
|
||||
DEF_KERNEL_EXECUTOR(_argmax_exec)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||
float * buffer[_CPU_IO_NUM] = { NULL };
|
||||
size_t out_elements = 0;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
int32_t i;
|
||||
int32_t axis = 0;
|
||||
vsi_ssize_t outerSize = 1;
|
||||
vsi_ssize_t axisSize = 1;
|
||||
vsi_ssize_t innerSize = 1;
|
||||
vsi_ssize_t inner = 0;
|
||||
vsi_ssize_t outer = 0;
|
||||
|
||||
|
||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
||||
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
||||
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
|
||||
|
||||
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
|
||||
buffer[1] = (float *)malloc( out_elements * sizeof(float) );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
|
||||
memset( buffer[1], 0, out_elements * sizeof(float) );
|
||||
|
||||
for (i = 0; i < axis; i++)
|
||||
{
|
||||
innerSize *= attr[0]->shape->data[i];
|
||||
}
|
||||
|
||||
axisSize = attr[0]->shape->data[axis];
|
||||
|
||||
for (i = axis + 1; i < (int32_t)attr[0]->shape->size; i++)
|
||||
{
|
||||
outerSize *= attr[0]->shape->data[i];
|
||||
}
|
||||
|
||||
for ( outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
for ( inner = 0; inner < innerSize; ++inner)
|
||||
{
|
||||
float minMaxValue = buffer[0][outer * axisSize * innerSize + inner];
|
||||
int32_t minMaxIndex = 0;
|
||||
for (i = 1; i < axisSize; ++i)
|
||||
{
|
||||
float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
|
||||
if (value > minMaxValue)
|
||||
{
|
||||
minMaxValue = value;
|
||||
minMaxIndex = i;
|
||||
}
|
||||
}
|
||||
buffer[1][outer * innerSize + inner] = (float)minMaxIndex;
|
||||
}
|
||||
}
|
||||
|
||||
status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
|
||||
buffer[1], out_elements );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
|
||||
final:
|
||||
for( i = 0; i < _CPU_IO_NUM; i ++ )
|
||||
{
|
||||
if( buffer[i] )
|
||||
{
|
||||
free( buffer[i] );
|
||||
}
|
||||
vsi_nn_kernel_tensor_attr_release( &attr[i] );
|
||||
}
|
||||
return status;
|
||||
} /* _minimum_exec() */
|
||||
|
||||
static vx_param_description_t kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
vsi_nn_tensor_t* const* const outputs,
|
||||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _argmax_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
#define SCALAR_INPUT_AXIS (2)
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis = 0;
|
||||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
status = _query_kernel( inputs, outputs, kernel );
|
||||
if( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
|
||||
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
|
||||
backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
|
||||
graph, I32, &axis );
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_AXIS] );
|
||||
}
|
||||
else
|
||||
{
|
||||
status = VSI_FAILURE;
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( argmax, _setup )
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define _CPU_ARG_NUM (1)
|
||||
#define _CPU_INPUT_NUM (1)
|
||||
#define _CPU_OUTPUT_NUM (1)
|
||||
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
|
||||
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("argmin_sw")
|
||||
|
||||
DEF_KERNEL_EXECUTOR(_argmin_exec)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||
float * buffer[_CPU_IO_NUM] = { NULL };
|
||||
size_t out_elements = 0;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
int32_t i;
|
||||
int32_t axis = 0;
|
||||
vsi_ssize_t outerSize = 1;
|
||||
vsi_ssize_t axisSize = 1;
|
||||
vsi_ssize_t innerSize = 1;
|
||||
vsi_ssize_t inner = 0;
|
||||
vsi_ssize_t outer = 0;
|
||||
|
||||
|
||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
||||
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
||||
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
|
||||
|
||||
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
|
||||
buffer[1] = (float *)malloc( out_elements * sizeof(float) );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
|
||||
memset( buffer[1], 0, out_elements * sizeof(float) );
|
||||
|
||||
for (i = 0; i < axis; i++)
|
||||
{
|
||||
innerSize *= attr[0]->shape->data[i];
|
||||
}
|
||||
|
||||
axisSize = attr[0]->shape->data[axis];
|
||||
|
||||
for (i = axis + 1; i < (int32_t)attr[0]->shape->size; i++)
|
||||
{
|
||||
outerSize *= attr[0]->shape->data[i];
|
||||
}
|
||||
|
||||
for ( outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
for ( inner = 0; inner < innerSize; ++inner)
|
||||
{
|
||||
float minMaxValue = buffer[0][outer * axisSize * innerSize + inner];
|
||||
int32_t minMaxIndex = 0;
|
||||
for (i = 1; i < axisSize; ++i)
|
||||
{
|
||||
float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
|
||||
if (value < minMaxValue)
|
||||
{
|
||||
minMaxValue = value;
|
||||
minMaxIndex = i;
|
||||
}
|
||||
}
|
||||
buffer[1][outer * innerSize + inner] = (float)minMaxIndex;
|
||||
}
|
||||
}
|
||||
|
||||
status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
|
||||
buffer[1], out_elements );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
|
||||
final:
|
||||
for( i = 0; i < _CPU_IO_NUM; i ++ )
|
||||
{
|
||||
if( buffer[i] )
|
||||
{
|
||||
free( buffer[i] );
|
||||
}
|
||||
vsi_nn_kernel_tensor_attr_release( &attr[i] );
|
||||
}
|
||||
return status;
|
||||
} /* _minimum_exec() */
|
||||
|
||||
static vx_param_description_t kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
vsi_nn_tensor_t* const* const outputs,
|
||||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _argmin_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
#define SCALAR_INPUT_AXIS (2)
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis = 0;
|
||||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
status = _query_kernel( inputs, outputs, kernel );
|
||||
if( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
|
||||
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
|
||||
backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
|
||||
graph, I32, &axis );
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_AXIS] );
|
||||
}
|
||||
else
|
||||
{
|
||||
status = VSI_FAILURE;
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( argmin, _setup )
|
||||
|
|
@ -1,277 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (4)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.axis_aligned_bbox_transform")
|
||||
|
||||
typedef struct vsi_nn_box_encoding_corner_t
|
||||
{
|
||||
float x1, y1, x2, y2;
|
||||
}vsi_nn_box_encoding_corner;
|
||||
|
||||
typedef struct vsi_nn_box_encoding_center_t
|
||||
{
|
||||
float w, h, x, y;
|
||||
}vsi_nn_box_encoding_center;
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _axis_aligned_bbox_transform_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM _cnt_of_array( _axis_aligned_bbox_transform_kernel_param_def )
|
||||
|
||||
|
||||
static void _to_box_encoding_corner
|
||||
(
|
||||
vsi_nn_box_encoding_center* ctr,
|
||||
vsi_nn_box_encoding_corner* cnr
|
||||
)
|
||||
{
|
||||
cnr->x1 = ctr->x - ctr->w / 2;
|
||||
cnr->y1 = ctr->y - ctr->h / 2;
|
||||
cnr->x2 = ctr->x + ctr->w / 2;
|
||||
cnr->y2 = ctr->y + ctr->h / 2;
|
||||
}
|
||||
|
||||
static void _to_box_encoding_center
|
||||
(
|
||||
vsi_nn_box_encoding_corner* cnr,
|
||||
vsi_nn_box_encoding_center* ctr
|
||||
)
|
||||
{
|
||||
ctr->w = cnr->x2 - cnr->x1;
|
||||
ctr->h = cnr->y2 - cnr->y1;
|
||||
ctr->x = (cnr->x1 + cnr->x2) / 2;
|
||||
ctr->y = (cnr->y1 + cnr->y2) / 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i;
|
||||
const uint32_t roiLength = 4;
|
||||
const uint32_t imageLength = 2;
|
||||
vsi_size_t numClasses = 0;
|
||||
vsi_size_t numRois = 0;
|
||||
vsi_size_t j;
|
||||
vsi_size_t roiIndex;
|
||||
|
||||
/* prepare data */
|
||||
for (i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
|
||||
numClasses = in_attr[1]->shape->data[0] / roiLength;
|
||||
numRois = in_attr[0]->shape->data[1];
|
||||
|
||||
for (roiIndex = 0; roiIndex < numRois; roiIndex++)
|
||||
{
|
||||
uint32_t batchIndex = (uint32_t)f32_in_buffer[2][roiIndex];
|
||||
float imageHeight = f32_in_buffer[3][batchIndex * imageLength];
|
||||
float imageWidth = f32_in_buffer[3][batchIndex * imageLength + 1];
|
||||
vsi_nn_box_encoding_corner roi_cnr;
|
||||
vsi_nn_box_encoding_center roiBefore;
|
||||
roi_cnr.x1 = f32_in_buffer[0][roiIndex * roiLength];
|
||||
roi_cnr.y1 = f32_in_buffer[0][roiIndex * roiLength + 1];
|
||||
roi_cnr.x2 = f32_in_buffer[0][roiIndex * roiLength + 2];
|
||||
roi_cnr.y2 = f32_in_buffer[0][roiIndex * roiLength + 3];
|
||||
_to_box_encoding_center(&roi_cnr, &roiBefore);
|
||||
|
||||
for (j = 0; j < numClasses; j++)
|
||||
{
|
||||
vsi_nn_box_encoding_center roi_ctr;
|
||||
vsi_nn_box_encoding_corner roiAfter;
|
||||
vsi_nn_box_encoding_corner cliped;
|
||||
vsi_size_t index = (roiIndex * numClasses + j) * roiLength;
|
||||
|
||||
roi_ctr.w = (float)(exp(f32_in_buffer[1][index + 2]) * roiBefore.w);
|
||||
roi_ctr.h = (float)(exp(f32_in_buffer[1][index + 3]) * roiBefore.h);
|
||||
roi_ctr.x = roiBefore.x + f32_in_buffer[1][index] * roiBefore.w;
|
||||
roi_ctr.y = roiBefore.y + f32_in_buffer[1][index + 1] * roiBefore.h;
|
||||
_to_box_encoding_corner(&roi_ctr, &roiAfter);
|
||||
|
||||
cliped.x1 = vsi_nn_min(vsi_nn_max(roiAfter.x1, 0.0f), imageWidth);
|
||||
cliped.y1 = vsi_nn_min(vsi_nn_max(roiAfter.y1, 0.0f), imageHeight);
|
||||
cliped.x2 = vsi_nn_min(vsi_nn_max(roiAfter.x2, 0.0f), imageWidth);
|
||||
cliped.y2 = vsi_nn_min(vsi_nn_max(roiAfter.y2, 0.0f), imageHeight);
|
||||
f32_out_buffer[0][index] = cliped.x1;
|
||||
f32_out_buffer[0][index + 1] = cliped.y1;
|
||||
f32_out_buffer[0][index + 2] = cliped.x2;
|
||||
f32_out_buffer[0][index + 3] = cliped.y2;
|
||||
}
|
||||
}
|
||||
|
||||
/* save data */
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
if (f32_in_buffer[i])
|
||||
{
|
||||
free(f32_in_buffer[i]);
|
||||
f32_in_buffer[i] = NULL;
|
||||
}
|
||||
if (in_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (f32_out_buffer[i])
|
||||
{
|
||||
free(f32_out_buffer[i]);
|
||||
f32_out_buffer[i] = NULL;
|
||||
}
|
||||
if (out_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _axis_aligned_bbox_transform_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _axis_aligned_bbox_transform_kernel_param_def );
|
||||
status = VSI_SUCCESS;
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
|
||||
if( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( axis_aligned_bbox_transform, _setup )
|
||||
|
|
@ -1,222 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define _CPU_ARG_NUM (1)
|
||||
#define _CPU_INPUT_NUM (5)
|
||||
#define _CPU_OUTPUT_NUM (1)
|
||||
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
|
||||
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("batch_norm_sw")
|
||||
|
||||
static vsi_ssize_t _expand_offset
|
||||
(
|
||||
vsi_ssize_t index,
|
||||
vsi_size_t * shape, vsi_size_t rank,
|
||||
vsi_size_t * strides, vsi_size_t * out_shape
|
||||
)
|
||||
{
|
||||
vsi_size_t i;
|
||||
vsi_ssize_t offset = 0;
|
||||
|
||||
for( i = 0; i < rank && index; i ++ )
|
||||
{
|
||||
if( shape[i] == out_shape[i] )
|
||||
{
|
||||
offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
|
||||
}
|
||||
index /= out_shape[i];
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
DEF_KERNEL_EXECUTOR(_batch_norm_exec)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VX_SUCCESS;
|
||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||
float * buffer[_CPU_IO_NUM] = { NULL };
|
||||
vsi_size_t out_elements = 0;
|
||||
vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
uint32_t i = 0;
|
||||
float eps = 0.f;
|
||||
|
||||
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[param_size - 1], &eps);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
for ( i = 0; i < _CPU_INPUT_NUM; i++)
|
||||
{
|
||||
tensors[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
attr[i] = vsi_nn_kernel_tensor_attr_create( tensors[i] );
|
||||
|
||||
vsi_nn_kernel_tensor_attr_get_stride( attr[i], stride_size[i] );
|
||||
buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[i], attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[i], "Create input buffer fail.", final );
|
||||
}
|
||||
|
||||
tensors[5] = (vsi_nn_kernel_tensor_t)param[5];
|
||||
attr[5] = vsi_nn_kernel_tensor_attr_create( tensors[5] );
|
||||
|
||||
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[5] );
|
||||
|
||||
buffer[5] = (float *)malloc( out_elements * sizeof(float) );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[5], "Create output buffer fail.", final );
|
||||
memset( buffer[5], 0, out_elements * sizeof(float) );
|
||||
|
||||
for( i = 0; i < out_elements; i ++ )
|
||||
{
|
||||
vsi_ssize_t in_offset[5] = {0};
|
||||
int32_t j = 0;
|
||||
float src = 0.f;
|
||||
float mean = 0.f;
|
||||
float variance = 0.f;
|
||||
float beta = 0.f;
|
||||
float gamma = 0.f;
|
||||
|
||||
for ( j = 0; j < 5; j++)
|
||||
{
|
||||
in_offset[j] = _expand_offset( i, attr[j]->shape->data, (vsi_size_t)attr[j]->shape->size,
|
||||
stride_size[j], attr[5]->shape->data );
|
||||
}
|
||||
|
||||
src = buffer[0][in_offset[0]];
|
||||
mean = buffer[1][in_offset[1]];
|
||||
variance = buffer[2][in_offset[2]];
|
||||
gamma = buffer[3][in_offset[3]];
|
||||
beta = buffer[4][in_offset[4]];
|
||||
|
||||
|
||||
buffer[5][i] = (src - mean) * gamma/ sqrtf(variance + eps) + beta;
|
||||
}
|
||||
|
||||
status = vsi_nn_kernel_tensor_write_from_float( tensors[5], attr[5],
|
||||
buffer[5], out_elements );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
|
||||
final:
|
||||
for( i = 0; i < _CPU_IO_NUM; i ++ )
|
||||
{
|
||||
if( buffer[i] )
|
||||
{
|
||||
free( buffer[i] );
|
||||
}
|
||||
vsi_nn_kernel_tensor_attr_release( &attr[i] );
|
||||
}
|
||||
return status;
|
||||
} /* _batch_norm_exec() */
|
||||
|
||||
static vx_param_description_t kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
#define SCALAR_INPUT_EPS (6)
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
vsi_nn_tensor_t* const* const outputs,
|
||||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _batch_norm_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
float eps = 0;
|
||||
|
||||
eps = vsi_nn_kernel_param_get_float32(params, "eps");
|
||||
|
||||
status = _query_kernel( inputs, outputs, kernel );
|
||||
if( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
|
||||
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
|
||||
/* Pass parameters to node. */
|
||||
backend_params[SCALAR_INPUT_EPS] = vsi_nn_kernel_scalar_create(
|
||||
graph, F32, &eps );
|
||||
|
||||
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
|
||||
|
||||
vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_EPS] );
|
||||
}
|
||||
else
|
||||
{
|
||||
status = VSI_FAILURE;
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( batchnorm_single, _setup )
|
||||
|
|
@ -1,534 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (3)
|
||||
#define _OUTPUT_NUM (4)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.box_with_nms_limit")
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _box_with_nms_limit_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _BOX_WITH_NMS_LIMIT_PARAM_NUM _cnt_of_array( _box_with_nms_limit_kernel_param_def )
|
||||
#define SCORE_THRESHOLD (7)
|
||||
#define MAX_NUM_DETECTIONS (8)
|
||||
#define NMS_KERNEL_METHOD (9)
|
||||
#define IOU_THRESHOLD (10)
|
||||
#define SIGMA (11)
|
||||
#define NMS_SCORE_THRESHOLD (12)
|
||||
|
||||
static float hard_nms_kernel
|
||||
(
|
||||
float iou,
|
||||
float iouThreshold
|
||||
)
|
||||
{
|
||||
return iou < iouThreshold ? 1.0f : 0.0f;
|
||||
}
|
||||
|
||||
static float linear_nms_kernel
|
||||
(
|
||||
float iou,
|
||||
float iouThreshold
|
||||
)
|
||||
{
|
||||
return iou < iouThreshold ? 1.0f : 1.0f - iou;
|
||||
}
|
||||
|
||||
static float gaussian_nms_kernel
|
||||
(
|
||||
float iou,
|
||||
float sigma
|
||||
)
|
||||
{
|
||||
return (float)(exp(-1.0f * iou * iou / sigma));
|
||||
}
|
||||
|
||||
void swap_element
|
||||
(
|
||||
uint32_t* list,
|
||||
uint32_t first,
|
||||
uint32_t second
|
||||
)
|
||||
{
|
||||
uint32_t temp = list[first];
|
||||
list[first] = list[second];
|
||||
list[second] = temp;
|
||||
}
|
||||
|
||||
uint32_t max_element
|
||||
(
|
||||
float* data,
|
||||
uint32_t* index_list,
|
||||
uint32_t len
|
||||
)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t max_index = 0;
|
||||
float max_val = data[index_list[0]];
|
||||
for(i = 1; i < len; i++)
|
||||
{
|
||||
float val = data[index_list[i]];
|
||||
if (max_val < val)
|
||||
{
|
||||
max_val = val;
|
||||
max_index = i;
|
||||
}
|
||||
}
|
||||
return max_index;
|
||||
}
|
||||
|
||||
static uint32_t max_comp_func
|
||||
(
|
||||
void* data,
|
||||
int32_t left,
|
||||
int32_t right
|
||||
)
|
||||
{
|
||||
float* fdata = (float*)data;
|
||||
return fdata[left] >= fdata[right];
|
||||
}
|
||||
|
||||
void sort_element_by_score
|
||||
(
|
||||
float* data,
|
||||
uint32_t* index_list,
|
||||
uint32_t len
|
||||
)
|
||||
{
|
||||
vsi_nn_partition(data, 0, len - 1, max_comp_func, TRUE, index_list);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float* fdata;
|
||||
uint32_t numClasses;
|
||||
} class_comp_param;
|
||||
|
||||
static uint32_t class_comp_func
|
||||
(
|
||||
void* data,
|
||||
int32_t left,
|
||||
int32_t right
|
||||
)
|
||||
{
|
||||
class_comp_param *p = (class_comp_param*)data;
|
||||
float* fdata = p->fdata;
|
||||
uint32_t numClasses = p->numClasses;
|
||||
uint32_t lhsClass = left % numClasses, rhsClass = right % numClasses;
|
||||
return lhsClass == rhsClass ? fdata[left] > fdata[right]
|
||||
: lhsClass < rhsClass;
|
||||
}
|
||||
|
||||
static void sort_element_by_class
|
||||
(
|
||||
float* data,
|
||||
uint32_t* index_list,
|
||||
uint32_t len,
|
||||
uint32_t numClasses
|
||||
)
|
||||
{
|
||||
class_comp_param class_comp;
|
||||
class_comp.fdata = data;
|
||||
class_comp.numClasses = numClasses;
|
||||
vsi_nn_partition(&class_comp, 0, len - 1, class_comp_func, TRUE, index_list);
|
||||
}
|
||||
|
||||
// Taking two indices of bounding boxes, return the intersection-of-union.
|
||||
float getIoUAxisAligned
|
||||
(
|
||||
const float* roi1,
|
||||
const float* roi2
|
||||
)
|
||||
{
|
||||
const float area1 = (roi1[2] - roi1[0]) * (roi1[3] - roi1[1]);
|
||||
const float area2 = (roi2[2] - roi2[0]) * (roi2[3] - roi2[1]);
|
||||
const float x1 = vsi_nn_max(roi1[0], roi2[0]);
|
||||
const float x2 = vsi_nn_min(roi1[2], roi2[2]);
|
||||
const float y1 = vsi_nn_max(roi1[1], roi2[1]);
|
||||
const float y2 = vsi_nn_min(roi1[3], roi2[3]);
|
||||
const float w = vsi_nn_max(x2 - x1, 0.0f);
|
||||
const float h = vsi_nn_max(y2 - y1, 0.0f);
|
||||
const float areaIntersect = w * h;
|
||||
const float areaUnion = area1 + area2 - areaIntersect;
|
||||
return areaIntersect / areaUnion;
|
||||
}
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
int32_t* int32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
int32_t* int32_out_buffer[_OUTPUT_NUM] = {0};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i = 0;
|
||||
float score_threshold = 0;
|
||||
int32_t max_num_detections = 0;
|
||||
int32_t nms_kernel_method = 0;
|
||||
float iou_threshold = 0;
|
||||
float sigma = 0;
|
||||
float nms_score_threshold = 0;
|
||||
uint32_t j = 0, n = 0, b = 0, c = 0;
|
||||
const uint32_t kRoiDim = 4;
|
||||
uint32_t numRois = 0;
|
||||
uint32_t numClasses = 0;
|
||||
int32_t ind = 0;
|
||||
uint32_t * batch_data = NULL;
|
||||
int32_t numBatch = 0;
|
||||
uint32_t * select = NULL;
|
||||
uint32_t select_size = 0;
|
||||
uint32_t scores_index = 0;
|
||||
uint32_t roi_index = 0;
|
||||
uint32_t roi_out_index = 0;
|
||||
|
||||
/* prepare data */
|
||||
for (i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
if (i == 2)
|
||||
{
|
||||
int32_in_buffer[i] = (int32_t*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( int32_in_buffer[i], "Create input buffer fail.", final );
|
||||
}
|
||||
else
|
||||
{
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input buffer fail.", final );
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
if (i < 2)
|
||||
{
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_out_buffer[i] = (int32_t *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( int32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( int32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
}
|
||||
|
||||
#define VSI_NN_KERNEL_READ_SCALAR(type, idx, pointer) \
|
||||
vsi_nn_kernel_scalar_read_##type((vsi_nn_kernel_scalar_t)param[idx], pointer)
|
||||
|
||||
status = VSI_NN_KERNEL_READ_SCALAR(float32, SCORE_THRESHOLD, &score_threshold);
|
||||
status |= VSI_NN_KERNEL_READ_SCALAR(int32, MAX_NUM_DETECTIONS, &max_num_detections);
|
||||
status |= VSI_NN_KERNEL_READ_SCALAR(int32, NMS_KERNEL_METHOD, &nms_kernel_method);
|
||||
status |= VSI_NN_KERNEL_READ_SCALAR(float32, IOU_THRESHOLD, &iou_threshold);
|
||||
status |= VSI_NN_KERNEL_READ_SCALAR(float32, SIGMA, &sigma);
|
||||
status |= VSI_NN_KERNEL_READ_SCALAR(float32, NMS_SCORE_THRESHOLD, &nms_score_threshold);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
#undef VSI_NN_KERNEL_READ_SCALAR
|
||||
|
||||
numRois = (uint32_t)in_attr[0]->shape->data[1];
|
||||
numClasses = (uint32_t)in_attr[0]->shape->data[0];
|
||||
|
||||
batch_data = (uint32_t*)malloc(numRois * sizeof(uint32_t));
|
||||
CHECK_PTR_FAIL_GOTO( batch_data, "Create batch_data fail.", final );
|
||||
memset(batch_data, 0, numRois * sizeof(uint32_t));
|
||||
|
||||
for (i = 0, ind = -1; i < numRois; i++)
|
||||
{
|
||||
if (int32_in_buffer[2][i] != ind)
|
||||
{
|
||||
ind = int32_in_buffer[2][i];
|
||||
numBatch++;
|
||||
}
|
||||
batch_data[numBatch - 1]++;
|
||||
}
|
||||
select = (uint32_t*)malloc(numBatch * numRois
|
||||
* numClasses * sizeof(uint32_t));
|
||||
CHECK_PTR_FAIL_GOTO( select, "Create select fail.", final );
|
||||
memset(select, 0, numBatch * numRois * numClasses * sizeof(uint32_t));
|
||||
for (n = 0; n < (uint32_t)numBatch; n++)
|
||||
{
|
||||
int32_t numDetections_batch = 0;
|
||||
uint32_t select_start_batch = select_size;
|
||||
uint32_t select_len = 0;
|
||||
// Exclude class 0 (background)
|
||||
for (c = 1; c < numClasses; c++)
|
||||
{
|
||||
uint32_t select_start = select_size;
|
||||
int32_t maxNumDetections0 = max_num_detections;
|
||||
uint32_t numDetections = 0;
|
||||
for (b = 0; b < batch_data[n]; b++)
|
||||
{
|
||||
uint32_t index = b * numClasses + c;
|
||||
float score = f32_in_buffer[0][scores_index + index];
|
||||
if (score > score_threshold) {
|
||||
select[select_size] = index;
|
||||
select_size++;
|
||||
}
|
||||
}
|
||||
select_len = select_size - select_start;
|
||||
|
||||
if (maxNumDetections0 < 0)
|
||||
{
|
||||
maxNumDetections0 = select_len;
|
||||
}
|
||||
|
||||
for (j = 0; (j < select_len && numDetections < (uint32_t)maxNumDetections0); j++)
|
||||
{
|
||||
// find max score and swap to the front.
|
||||
int32_t max_index = max_element(&(f32_in_buffer[0][scores_index]),
|
||||
&(select[select_start + j]), select_len - j) + j;
|
||||
|
||||
swap_element(&(select[select_start]), max_index, j);
|
||||
|
||||
// Calculate IoU of the rest, swap to the end (disgard) if needed.
|
||||
for (i = j + 1; i < select_len; i++)
|
||||
{
|
||||
int32_t roiBase0 = roi_index + select[select_start + i] * kRoiDim;
|
||||
int32_t roiBase1 = roi_index + select[select_start + j] * kRoiDim;
|
||||
float iou = getIoUAxisAligned(&(f32_in_buffer[1][roiBase0]),
|
||||
&(f32_in_buffer[1][roiBase1]));
|
||||
float kernel_iou;
|
||||
if (nms_kernel_method == 0)
|
||||
{
|
||||
kernel_iou = hard_nms_kernel(iou, iou_threshold);
|
||||
}
|
||||
else if (nms_kernel_method == 1)
|
||||
{
|
||||
kernel_iou = linear_nms_kernel(iou, iou_threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
kernel_iou = gaussian_nms_kernel(iou, sigma);
|
||||
}
|
||||
f32_in_buffer[0][scores_index + select[select_start + i]] *= kernel_iou;
|
||||
if (f32_in_buffer[0][scores_index + select[select_start + i]] < nms_score_threshold)
|
||||
{
|
||||
swap_element(&(select[select_start]), i, select_len - 1);
|
||||
i--;
|
||||
select_len--;
|
||||
}
|
||||
}
|
||||
numDetections++;
|
||||
}
|
||||
select_size = select_start + select_len;
|
||||
numDetections_batch += numDetections;
|
||||
}
|
||||
|
||||
// Take top max_num_detections.
|
||||
sort_element_by_score(&(f32_in_buffer[0][scores_index]), &(select[select_start_batch]),
|
||||
numDetections_batch);
|
||||
|
||||
if (numDetections_batch > max_num_detections && max_num_detections >= 0)
|
||||
{
|
||||
select_size = select_start_batch + max_num_detections;
|
||||
}
|
||||
select_len = select_size - select_start_batch;
|
||||
// Sort again by class.
|
||||
sort_element_by_class(&(f32_in_buffer[0][scores_index]), &(select[select_start_batch]),
|
||||
select_len, numClasses);
|
||||
|
||||
for (i = 0; i < select_len; i++)
|
||||
{
|
||||
int32_t in_index0 = scores_index + select[select_start_batch + i];
|
||||
int32_t in_index1 = roi_index + select[select_start_batch + i] * kRoiDim;
|
||||
f32_out_buffer[0][roi_out_index] = f32_in_buffer[0][in_index0];
|
||||
memcpy(&(f32_out_buffer[1][roi_out_index * kRoiDim]),
|
||||
&f32_in_buffer[1][in_index1], kRoiDim * sizeof(float));
|
||||
int32_out_buffer[2][roi_out_index] = select[select_start_batch + i] % numClasses;
|
||||
int32_out_buffer[3][roi_out_index] = n;
|
||||
roi_out_index++;
|
||||
}
|
||||
|
||||
scores_index += batch_data[n] * numClasses;
|
||||
roi_index += batch_data[n] * numClasses * kRoiDim;
|
||||
}
|
||||
|
||||
/* save data */
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (i < 2)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
}
|
||||
else
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write( output[i], out_attr[i],
|
||||
int32_out_buffer[i], out_bytes[i] );
|
||||
}
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
final:
|
||||
vsi_nn_safe_free(batch_data);
|
||||
vsi_nn_safe_free(select);
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
vsi_nn_safe_free(f32_in_buffer[i]);
|
||||
vsi_nn_safe_free(int32_in_buffer[i]);
|
||||
|
||||
if (in_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
vsi_nn_safe_free(f32_out_buffer[i]);
|
||||
vsi_nn_safe_free(int32_out_buffer[i]);
|
||||
|
||||
if (out_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _box_with_nms_limit_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _box_with_nms_limit_kernel_param_def );
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_BOX_WITH_NMS_LIMIT_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
float score_threshold = vsi_nn_kernel_param_get_float32( params, "score_threshold" );
|
||||
int32_t max_num_detections = vsi_nn_kernel_param_get_int32( params, "max_num_detections" );
|
||||
int32_t nms_kernel_method = vsi_nn_kernel_param_get_int32( params, "nms_kernel_method" );
|
||||
float iou_threshold = vsi_nn_kernel_param_get_float32( params, "iou_threshold" );
|
||||
float sigma = vsi_nn_kernel_param_get_float32( params, "sigma" );
|
||||
float nms_score_threshold = vsi_nn_kernel_param_get_float32( params, "nms_score_threshold" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if ( VSI_SUCCESS == status )
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _BOX_WITH_NMS_LIMIT_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCORE_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &score_threshold );
|
||||
node_params[MAX_NUM_DETECTIONS] = vsi_nn_kernel_scalar_create( graph, I32, &max_num_detections );
|
||||
node_params[NMS_KERNEL_METHOD] = vsi_nn_kernel_scalar_create( graph, I32, &nms_kernel_method );
|
||||
node_params[IOU_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &iou_threshold );
|
||||
node_params[SIGMA] = vsi_nn_kernel_scalar_create( graph, F32, &sigma );
|
||||
node_params[NMS_SCORE_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &nms_score_threshold );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _BOX_WITH_NMS_LIMIT_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCORE_THRESHOLD] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[MAX_NUM_DETECTIONS] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[NMS_KERNEL_METHOD] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[IOU_THRESHOLD] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SIGMA] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[NMS_SCORE_THRESHOLD] );
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( box_with_nms_limit, _setup )
|
||||
|
|
@ -1,229 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (2)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.bucketize")
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _bucketize_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _BUCKETIZE_PARAM_NUM _cnt_of_array( _bucketize_kernel_param_def )
|
||||
#define SCALAR_RIGHT_VALUE (3)
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i = 0, j = 0;
|
||||
int32_t right = 0;
|
||||
uint32_t boundaries_size = 0;
|
||||
|
||||
/* prepare data */
|
||||
for(i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
|
||||
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_RIGHT_VALUE], &(right));
|
||||
|
||||
boundaries_size = (uint32_t)in_attr[1]->shape->data[0];
|
||||
|
||||
for (i = 0; i < out_elements[0]; i++)
|
||||
{
|
||||
float src0 = f32_in_buffer[0][i];
|
||||
float dst = 0;
|
||||
|
||||
for (j = 0; j < boundaries_size; j++)
|
||||
{
|
||||
float src1 = f32_in_buffer[1][j];
|
||||
|
||||
if (right == 1)
|
||||
{
|
||||
dst += (src0 >= src1 ? 1.0f : 0.0f);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst += (src0 > src1 ? 1.0f : 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
f32_out_buffer[0][i] = dst;
|
||||
}
|
||||
|
||||
/* save data */
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
if (f32_in_buffer[i])
|
||||
{
|
||||
free(f32_in_buffer[i]);
|
||||
f32_in_buffer[i] = NULL;
|
||||
}
|
||||
if (in_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (f32_out_buffer[i])
|
||||
{
|
||||
free(f32_out_buffer[i]);
|
||||
f32_out_buffer[i] = NULL;
|
||||
}
|
||||
if (out_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _bucketize_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _bucketize_kernel_param_def );
|
||||
status = VSI_SUCCESS;
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_BUCKETIZE_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t right = vsi_nn_kernel_param_get_int32( params, "right" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _BUCKETIZE_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
/* Pass parameters to node. */
|
||||
node_params[SCALAR_RIGHT_VALUE] = vsi_nn_kernel_scalar_create( graph, I32, &right );
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _BUCKETIZE_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_RIGHT_VALUE] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( bucketize, _setup )
|
||||
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "utils/vsi_nn_dtype_util_prv.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (1)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.cast")
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _cast_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _CAST_PARAM_NUM _cnt_of_array( _cast_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i;
|
||||
double max_value = 0.0f, min_value = 0.0f;
|
||||
vsi_bool clamp_flag = FALSE;
|
||||
vsi_nn_type_e out_type;
|
||||
/* prepare data */
|
||||
for(i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
in_attr[i]->quant = VSI_NN_KERNEL_QUANT_NONE;
|
||||
in_attr[i]->dfp.fl = 0;
|
||||
in_attr[i]->asymm.scale = 1.0f;
|
||||
in_attr[i]->asymm.zero_point = 0;
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
|
||||
out_type = vsi_nn_dtype_map_kernel(out_attr[0]->dtype);
|
||||
|
||||
if( type_is_integer( out_type ) )
|
||||
{
|
||||
clamp_flag = TRUE;
|
||||
type_get_range(out_type, &max_value, &min_value);
|
||||
}
|
||||
|
||||
for (i = 0; i < out_elements[0]; i++)
|
||||
{
|
||||
float val = f32_in_buffer[0][i];
|
||||
if (clamp_flag)
|
||||
{
|
||||
val = vsi_nn_clamp(val, (float)min_value, (float)max_value);
|
||||
}
|
||||
f32_out_buffer[0][i] = val;
|
||||
}
|
||||
|
||||
|
||||
/* save data */
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
out_attr[i]->quant = VSI_NN_KERNEL_QUANT_NONE;
|
||||
out_attr[i]->dfp.fl = 0;
|
||||
out_attr[i]->asymm.scale = 1.0f;
|
||||
out_attr[i]->asymm.zero_point = 0;
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
if (f32_in_buffer[i])
|
||||
{
|
||||
free(f32_in_buffer[i]);
|
||||
f32_in_buffer[i] = NULL;
|
||||
}
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (f32_out_buffer[i])
|
||||
{
|
||||
free(f32_out_buffer[i]);
|
||||
f32_out_buffer[i] = NULL;
|
||||
}
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _cast_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _cast_kernel_param_def );
|
||||
status = VSI_SUCCESS;
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_CAST_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _CAST_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _CAST_PARAM_NUM );
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( cast, _setup )
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (1)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.clip")
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _clip_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _CLIP_PARAM_NUM _cnt_of_array( _clip_kernel_param_def )
|
||||
|
||||
#define SCALAR_MIN_VALUE (2)
|
||||
#define SCALAR_MAX_VALUE (3)
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i;
|
||||
float min_value = 0.0f;
|
||||
float max_value = 0.0f;
|
||||
|
||||
/* prepare data */
|
||||
for(i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
|
||||
vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MIN_VALUE], &(min_value));
|
||||
vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MAX_VALUE], &(max_value));
|
||||
|
||||
for (i = 0; i < out_elements[0]; i++)
|
||||
{
|
||||
f32_out_buffer[0][i] = vsi_nn_clamp(f32_in_buffer[0][i], min_value, max_value);
|
||||
}
|
||||
|
||||
/* save data */
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
if (f32_in_buffer[i])
|
||||
{
|
||||
free(f32_in_buffer[i]);
|
||||
f32_in_buffer[i] = NULL;
|
||||
}
|
||||
if (in_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (f32_out_buffer[i])
|
||||
{
|
||||
free(f32_out_buffer[i]);
|
||||
f32_out_buffer[i] = NULL;
|
||||
}
|
||||
if (out_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _clip_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _clip_kernel_param_def );
|
||||
status = VSI_SUCCESS;
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_CLIP_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
float min_value = vsi_nn_kernel_param_get_float32( params, "min_value" );
|
||||
float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _CLIP_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCALAR_MIN_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &min_value );
|
||||
node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _CLIP_PARAM_NUM );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MIN_VALUE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_VALUE] );
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( clip, _setup )
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue