Update prebuilt && internal for 23Q2 release (#617)
* Update prebuilt-sdk to 6.4.15 release Type: Code Improvement Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com> * Update internal to 1.1.84 rel Update internal to SHA 1e591108dddcbf6dd88d5eef97a7d8b3ffc19ce3 Type: Code Improvement Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com> --------- Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
This commit is contained in:
parent
02d6d72946
commit
32c5a61601
|
|
@ -1 +1 @@
|
||||||
6.4.14_CL650117A_D650117_A648302_R647402_T648811_O646970
|
6.4.15_CL690884A_D690855_A690484_R690194_T690259_O688896
|
||||||
|
|
@ -1340,6 +1340,21 @@ VX_API_ENTRY vx_status VX_API_CALL vxAssignNodeCallback(vx_node node, vx_nodecom
|
||||||
*/
|
*/
|
||||||
VX_API_ENTRY vx_nodecomplete_f VX_API_CALL vxRetrieveNodeCallback(vx_node node);
|
VX_API_ENTRY vx_nodecomplete_f VX_API_CALL vxRetrieveNodeCallback(vx_node node);
|
||||||
|
|
||||||
|
/*! \brief Assigns a callback to a node.
|
||||||
|
* If a callback already exists in this node, this function must return an error
|
||||||
|
* and the user may clear the callback by passing a NULL pointer as the callback.
|
||||||
|
* \param [in] node The reference to the node.
|
||||||
|
* \param [in] callback The callback to associate with completion of this
|
||||||
|
* specific node.
|
||||||
|
* \warning This must be used with <b><i>extreme</i></b> caution as it can \e ruin
|
||||||
|
* optimizations in the power/performance efficiency of a graph.
|
||||||
|
* \return A <tt>\ref vx_status_e</tt> enumeration.
|
||||||
|
* \retval VX_SUCCESS Callback assigned; any other value indicates failure.
|
||||||
|
* \retval VX_ERROR_INVALID_REFERENCE node is not a valid <tt>\ref vx_node</tt> reference.
|
||||||
|
* \ingroup group_node_callback
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_status VX_API_CALL vxAssignNodeQueryCallback(vx_node node, vx_nodequery_f callback);
|
||||||
|
|
||||||
/*! \brief Sets the node target to the provided value. A success invalidates the graph
|
/*! \brief Sets the node target to the provided value. A success invalidates the graph
|
||||||
* that the node belongs to (<tt>\ref vxVerifyGraph</tt> must be called before the next execution)
|
* that the node belongs to (<tt>\ref vxVerifyGraph</tt> must be called before the next execution)
|
||||||
* \param [in] node The reference to the <tt>\ref vx_node</tt> object.
|
* \param [in] node The reference to the <tt>\ref vx_node</tt> object.
|
||||||
|
|
|
||||||
|
|
@ -503,6 +503,40 @@ enum vx_kernel_e {
|
||||||
|
|
||||||
VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x33,
|
VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x33,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_FUSED_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x34,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_CONVOLUTION_RELU_POOLING_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x35,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_LAYER_NORMALIZATION_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x36,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_INSTANCE_NORMALIZATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x37,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_GROUP_NORMALIZATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x38,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_LOGICAL_OPS_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x39,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_LOGICAL_NOT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x40,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_RELATIONAL_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x41,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_TENSOR_REDUCE_MAX = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x42,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_MAXIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x43,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_MINIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x44,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_TENSOR_SELECT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x45,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_REDUCE_SUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x46,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_GRU_CELL_ACTIVATION_Z_H_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x47,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_GRU_CELL_H_TIMES_ACTIVATION_R_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x48,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_GRU_CELL_RESET_AFTER_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x49,
|
||||||
|
|
||||||
|
VX_KERNEL_NN_LSTM_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x50,
|
||||||
|
|
||||||
VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
|
VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -214,7 +214,7 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
|
||||||
1: support
|
1: support
|
||||||
*/
|
*/
|
||||||
#ifndef VX_STREAM_PROCESSOR_SUPPORT
|
#ifndef VX_STREAM_PROCESSOR_SUPPORT
|
||||||
#define VX_STREAM_PROCESSOR_SUPPORT 0
|
#define VX_STREAM_PROCESSOR_SUPPORT 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -258,5 +258,144 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
|
||||||
#define VX_ACTIVATION_EXT2_SUPPORT 1
|
#define VX_ACTIVATION_EXT2_SUPPORT 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_TENSORVIEW_ON_ANY_DIM is used to declare that ovxlib can do optimization for all concat node(all dimision) to tensor view if possiable, not only channel.
|
||||||
|
[value]
|
||||||
|
0: disable
|
||||||
|
1: enable
|
||||||
|
*/
|
||||||
|
#ifndef VX_TENSORVIEW_ON_ANY_DIM
|
||||||
|
#define VX_TENSORVIEW_ON_ANY_DIM 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_DEPTH2SPACE_CRD_MODE_SUPPORT is used to declare that SPACE2DEPTH can support CRD mode
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_DEPTH2SPACE_CRD_MODE_SUPPORT
|
||||||
|
#define VX_DEPTH2SPACE_CRD_MODE_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_LAYER_NORMALIZATION_VX_SUPPORT is used to declare driver support layer normalization layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_LAYER_NORMALIZATION_VX_SUPPORT
|
||||||
|
#define VX_LAYER_NORMALIZATION_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_LAYER_NORMALIZATION_VX_SUPPORT is used to declare driver support layer normalization layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_INSTANCE_NORMALIZATION_VX_SUPPORT
|
||||||
|
#define VX_INSTANCE_NORMALIZATION_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_GROUP_NORMALIZATION_VX_SUPPORT is used to declare driver support layer normalization layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_GROUP_NORMALIZATION_VX_SUPPORT
|
||||||
|
#define VX_GROUP_NORMALIZATION_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_LOGICAL_VX_SUPPORT is used to declare driver support layer logical related layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_LOGICAL_VX_SUPPORT
|
||||||
|
#define VX_LOGICAL_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_RELATIONAL_OPS_VX_SUPPORT is used to declare driver support layer relational related layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_RELATIONAL_OPS_VX_SUPPORT
|
||||||
|
#define VX_RELATIONAL_OPS_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_REDUCE_MAX_VX_SUPPORT is used to declare driver support layer reduce max layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_REDUCE_MAX_VX_SUPPORT
|
||||||
|
#define VX_REDUCE_MAX_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_REDUCE_MEAN_VX_SUPPORT is used to declare driver support layer reduce mean layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_REDUCE_MEAN_VX_SUPPORT
|
||||||
|
#define VX_REDUCE_MEAN_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_REDUCE_SUM_VX_SUPPORT is used to declare driver support layer reduce sum layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_REDUCE_SUM_VX_SUPPORT
|
||||||
|
#define VX_REDUCE_SUM_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_MAX_MIN_IMUM_VX_SUPPORT is used to declare driver support maximum and minimum layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_MAX_MIN_IMUM_VX_SUPPORT
|
||||||
|
#define VX_MAX_MIN_IMUM_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_TENSOR_SELECR_VX_SUPPORT is used to declare driver support tensor select layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_TENSOR_SELECT_VX_SUPPORT
|
||||||
|
#define VX_TENSOR_SELECT_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_GRU_CELL_VX_SUPPORT is used to declare driver support gru cell layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_GRU_CELL_VX_SUPPORT
|
||||||
|
#define VX_GRU_CELL_VX_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
VX_LSTM_ACTIVATION_SUPPORT is used to declare driver support gru cell layer.
|
||||||
|
[value]
|
||||||
|
0: not support
|
||||||
|
1: support
|
||||||
|
*/
|
||||||
|
#ifndef VX_LSTM_ACTIVATION_SUPPORT
|
||||||
|
#define VX_LSTM_ACTIVATION_SUPPORT 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* __VX_KHR_COMPATIBLE_H__ */
|
#endif /* __VX_KHR_COMPATIBLE_H__ */
|
||||||
|
|
|
||||||
|
|
@ -395,6 +395,17 @@ enum vx_tensor_lifetime_type_e
|
||||||
VX_TENSOR_LIFE_TIME_DYNAMIC,
|
VX_TENSOR_LIFE_TIME_DYNAMIC,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*! \brief Specifies depthtospace mode
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
enum vx_nn_depth_to_space_mode_e
|
||||||
|
{
|
||||||
|
/*! \brief DCR(default) for depth-column-row order re-arrangement */
|
||||||
|
VX_NN_DEPTH_TO_SPACE_DCR = 0x0,
|
||||||
|
/*! \brief CRD for column-row-depth order re-arrangement */
|
||||||
|
VX_NN_DEPTH_TO_SPACE_CRD,
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct _vx_nn_convolution_3d_params_t
|
typedef struct _vx_nn_convolution_3d_params_t
|
||||||
{
|
{
|
||||||
vx_int32 padding_w_left; /*!< \brief Number of elements added at each side in the left of w dimension of the input. */
|
vx_int32 padding_w_left; /*!< \brief Number of elements added at each side in the left of w dimension of the input. */
|
||||||
|
|
@ -972,6 +983,16 @@ typedef struct _vx_nn_mean_params_t
|
||||||
vx_int32 keep_dims; /*!< \brief Keep dims, if positive, retains reduced dims with length 1 */
|
vx_int32 keep_dims; /*!< \brief Keep dims, if positive, retains reduced dims with length 1 */
|
||||||
} vx_nn_mean_params_t;
|
} vx_nn_mean_params_t;
|
||||||
|
|
||||||
|
/*! \brief Input parameter for reducesum layer
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*\version 0.5
|
||||||
|
*/
|
||||||
|
typedef struct _vx_nn_sum_params_t
|
||||||
|
{
|
||||||
|
vx_tensor axis; /*!< \brief 1D axis tensor of reduce dims </tt> */
|
||||||
|
vx_int32 keep_dims; /*!< \brief Keep dims, if positive, retains reduced dims with length 1 */
|
||||||
|
} vx_nn_sum_params_t;
|
||||||
|
|
||||||
/*! \brief Input parameter for tensor squeeze layer
|
/*! \brief Input parameter for tensor squeeze layer
|
||||||
* \ingroup group_cnn
|
* \ingroup group_cnn
|
||||||
*\version 0.5
|
*\version 0.5
|
||||||
|
|
@ -1254,6 +1275,12 @@ typedef struct _vx_nn_reorg_params_ext2_t
|
||||||
vx_int32 *axis;
|
vx_int32 *axis;
|
||||||
} vx_nn_reorg_params_ext2_t;
|
} vx_nn_reorg_params_ext2_t;
|
||||||
|
|
||||||
|
typedef struct _vx_nn_reorg_params_ext3_t
|
||||||
|
{
|
||||||
|
vx_nn_reorg_params_ext2_t base; /*!< \brief vx_nn_reorg_params <tt>\ref vx_nn_reorg_params_t</tt> */
|
||||||
|
vx_enum mode; /*!< \brief [Optional] Only for DEPH2SPACE */
|
||||||
|
} vx_nn_reorg_params_ext3_t;
|
||||||
|
|
||||||
/*! \brief [Graph] Creates a Reorgnization Layer Node, Enhancement of vxReorgLayer, Support both DEPTH to SPACE and SPACE to DEPTH.
|
/*! \brief [Graph] Creates a Reorgnization Layer Node, Enhancement of vxReorgLayer, Support both DEPTH to SPACE and SPACE to DEPTH.
|
||||||
* \param [in] graph The reference to the parent graph.
|
* \param [in] graph The reference to the parent graph.
|
||||||
* \param [in] input The input tensor data to reorg.
|
* \param [in] input The input tensor data to reorg.
|
||||||
|
|
@ -1911,6 +1938,21 @@ VX_API_ENTRY vx_node VX_API_CALL vxRPNLayer(
|
||||||
vx_tensor score_output
|
vx_tensor score_output
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*! \brief Input parameters for a lstm activation operation.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
* \version 0.3
|
||||||
|
*/
|
||||||
|
typedef struct _vx_nn_lstm_activation_params_t
|
||||||
|
{
|
||||||
|
vx_int32 is_ln;
|
||||||
|
vx_int32 is_cifg;
|
||||||
|
vx_int32 is_proj;
|
||||||
|
vx_int32 is_hybrid;
|
||||||
|
vx_int32 is_peephole;
|
||||||
|
vx_int32 recurrent_activation;
|
||||||
|
vx_float32 forget_bias;
|
||||||
|
} vx_nn_lstm_activation_params_t;
|
||||||
|
|
||||||
/*! \brief Input parameters for a lstm operation.
|
/*! \brief Input parameters for a lstm operation.
|
||||||
* \ingroup group_cnn
|
* \ingroup group_cnn
|
||||||
* \version 0.3
|
* \version 0.3
|
||||||
|
|
@ -2115,6 +2157,28 @@ VX_API_ENTRY vx_node VX_API_CALL vxTensorMeanNode(
|
||||||
vx_size size_of_mean_param,
|
vx_size size_of_mean_param,
|
||||||
vx_tensor outputs);
|
vx_tensor outputs);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates sum layer node.
|
||||||
|
* \details
|
||||||
|
* Computes the sum of elements across dimensions of a tensor.
|
||||||
|
*
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input A n-D tensor, specifying the input.
|
||||||
|
* \param [in] sum_params paraments <tt>\ref vx_nn_sum_params_t </tt>.
|
||||||
|
* \param [in] size_of_sum_param [static] The size of the vx_nn_mean_params_t.
|
||||||
|
* \param [out] output A n-D tensor of the same type as input.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_tensor
|
||||||
|
* \version 0.5
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxReduceSumNode(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor inputs,
|
||||||
|
const vx_nn_sum_params_t *sum_params,
|
||||||
|
vx_size size_of_sum_param,
|
||||||
|
vx_tensor outputs);
|
||||||
|
|
||||||
/*! \brief [Graph] Creates squeeze layer node.
|
/*! \brief [Graph] Creates squeeze layer node.
|
||||||
* \details
|
* \details
|
||||||
* Remove dimensions of size 1 from the input tensor.
|
* Remove dimensions of size 1 from the input tensor.
|
||||||
|
|
@ -2287,6 +2351,282 @@ VX_API_ENTRY vx_node VX_API_CALL vxConv3dLayer(vx_graph graph, vx_tensor inputs,
|
||||||
*/
|
*/
|
||||||
VX_API_ENTRY vx_node VX_API_CALL vxDeconv3dLayer(vx_graph graph, vx_tensor inputs, vx_tensor weights, vx_tensor biases, const vx_nn_deconvolution_3d_params_t *convolution_params, vx_size size_of_deconv_params, vx_tensor outputs);
|
VX_API_ENTRY vx_node VX_API_CALL vxDeconv3dLayer(vx_graph graph, vx_tensor inputs, vx_tensor weights, vx_tensor biases, const vx_nn_deconvolution_3d_params_t *convolution_params, vx_size size_of_deconv_params, vx_tensor outputs);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer Normalization Node.
|
||||||
|
* \details Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] eps [static] Float 32. Small value to add to the variance estimate so that we don't divide by zero.(default is 1e-5)
|
||||||
|
* \param [in] axis [static] The axis on which we need do normalize.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxLayerNormalizationLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_float32 eps,
|
||||||
|
vx_int32 axis,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer instance normalization Node.
|
||||||
|
* \details Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] eps [static] Float 32. Small value to add to the variance estimate so that we don't divide by zero.(default is 1e-5)
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxInstanceNormalizationLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_float32 eps,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer instance normalization Node.
|
||||||
|
* \details Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] eps [static] Float 32. Small value to add to the variance estimate so that we don't divide by zero.(default is 1e-5)
|
||||||
|
* \param [in] group_num [static] Int 32. Number of groups for GN
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxGroupNormalizationLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_float32 eps,
|
||||||
|
vx_int32 group_num,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer logical ops Node.
|
||||||
|
* \details Return the truth value of x AND, XOR,OR y element-wise.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] ops_type [static] Int 32. Operation Type
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxLogicalOpsLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_int32 ops_type,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer logical not Node.
|
||||||
|
* \details Return the truth value of not x element-wise.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input [static] The input tensor data.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxLogicalNotLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor input,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer relational Node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] ops_type [static] Int 32. Operation Type
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxRelationalLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_int32 ops_type,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Computes the max of elements across dimensions of input tensor.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] in input tensor data,
|
||||||
|
* \param [in] axis [static] used to determine max across which dimension(dimension 0 means width, etc). If not given, compute the sum across all dimensions.
|
||||||
|
* \param [in] keep_dim [static] means if keep the dimesion count.
|
||||||
|
* \param [out] out output tensor data.
|
||||||
|
* \ingroup group_tensor
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \retval 0 Node could not be created.
|
||||||
|
* \retval * Node handle.
|
||||||
|
* \version 0.3
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxTensorReduceMaxNode(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor inputs,
|
||||||
|
vx_tensor axis,
|
||||||
|
vx_bool keep_dims,
|
||||||
|
vx_tensor outputs);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer minumum Node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxMinimumLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer maximum Node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxMaximumLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer select Node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [out] output [static] The output tensor data.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxTensorSelectLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor output
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer gru cell activation z h Node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [in] recurrent_activation [static] recurrent activation type.
|
||||||
|
* \param [in] activation [static] activation type.
|
||||||
|
* \param [out] output_list [static] The output tensor data.
|
||||||
|
* \param [out] output_count [static] The output tensor number.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxGruCellActivationZHLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_int32 recurrent_activation,
|
||||||
|
vx_int32 activation,
|
||||||
|
vx_tensor* output_list,
|
||||||
|
vx_uint32 output_count
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer gru cell h times activation r Node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [in] recurrent_activation [static] recurrent activation type.
|
||||||
|
* \param [out] output_list [static] The output tensor data.
|
||||||
|
* \param [out] output_count [static] The output tensor number.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxGruCellHTimeActivationRLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_int32 recurrent_activation,
|
||||||
|
vx_tensor* output_list,
|
||||||
|
vx_uint32 output_count
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer gru cell reset after activationNode.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [in] recurrent_activation [static] recurrent activation type.
|
||||||
|
* \param [in] activation [static] activation type.
|
||||||
|
* \param [out] output_list [static] The output tensor data.
|
||||||
|
* \param [out] output_count [static] The output tensor number.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxGruCellResetAfterActivationLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_int32 recurrent_activation,
|
||||||
|
vx_int32 activation,
|
||||||
|
vx_tensor* output_list,
|
||||||
|
vx_uint32 output_count
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief [Graph] Creates a layer lstm activation Node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list [static] The input tensor data.
|
||||||
|
* \param [in] input_count [static] The input tensor number.
|
||||||
|
* \param [in] lstm_activation_param <tt>\ref vx_nn_lstm_activation_params_t </tt>.
|
||||||
|
* \param [out] output_list [static] The output tensor data.
|
||||||
|
* \param [out] output_count [static] The output tensor number.
|
||||||
|
* \return <tt> vx_node</tt>.
|
||||||
|
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||||
|
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||||
|
* \ingroup group_cnn
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxLSTMActivationLayer(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
const vx_nn_lstm_activation_params_t * lstm_activation_param,
|
||||||
|
vx_tensor* output_list,
|
||||||
|
vx_uint32 output_count
|
||||||
|
);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -242,6 +242,48 @@ typedef struct _vx_nn_convolution_relu_pooling_params_ext7_t
|
||||||
vx_bool isSub;
|
vx_bool isSub;
|
||||||
} vx_nn_convolution_relu_pooling_params_ext7_t, * vx_nn_convolution_relu_pooling_params_ext7;
|
} vx_nn_convolution_relu_pooling_params_ext7_t, * vx_nn_convolution_relu_pooling_params_ext7;
|
||||||
|
|
||||||
|
typedef struct _vx_nn_fused_sp_params_t
|
||||||
|
{
|
||||||
|
vx_enum multi_sp_kernel_type;
|
||||||
|
/*!<for mul>*/
|
||||||
|
vx_scalar mul_scale;
|
||||||
|
/*!<for sp>*/
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
vx_scalar linear_a, linear_b;
|
||||||
|
} linear;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
vx_scalar tanh_a, tanh_b;
|
||||||
|
float a_v, b_v;
|
||||||
|
} tanh_linear;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
vx_scalar hsigmoid_a, hsigmoid_b;
|
||||||
|
} hsigmoid;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
vx_scalar clip_a, clip_b;
|
||||||
|
} clip;
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
vx_scalar scalar_a, scalar_b, scalar_c, scalar_d;
|
||||||
|
} params;
|
||||||
|
} scalar_params;
|
||||||
|
/*!<for other kernel>*/
|
||||||
|
} vx_nn_fused_sp_params_t, * vx_nn_fused_sp_params;
|
||||||
|
|
||||||
|
typedef struct _vx_nn_convolution_relu_pooling_params_sp_ext_t
|
||||||
|
{
|
||||||
|
vx_nn_convolution_relu_pooling_params_ext4_t ext4; /*!< \brief convolution relu pooling params <tt>\ref vx_nn_convolution_relu_pooling_params_ext_t</tt> */
|
||||||
|
vx_object_array inputs_list;
|
||||||
|
vx_object_array outputs_list;
|
||||||
|
vx_nn_fused_sp_params_t sp_param;
|
||||||
|
|
||||||
|
} vx_nn_convolution_relu_pooling_params_sp_ext_t, * vx_nn_convolution_relu_pooling_params_sp_ext;
|
||||||
|
|
||||||
/*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion.
|
/*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion.
|
||||||
* \details This function implement Convolutional Network Convolution and Activation(Relu) and Pooling layer.
|
* \details This function implement Convolutional Network Convolution and Activation(Relu) and Pooling layer.
|
||||||
* For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined,
|
* For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined,
|
||||||
|
|
@ -1129,6 +1171,48 @@ VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingLayer(vx_graph graph,
|
||||||
const vx_nn_gemm_relu_pooling_params merge_param,
|
const vx_nn_gemm_relu_pooling_params merge_param,
|
||||||
vx_tensor output);
|
vx_tensor output);
|
||||||
|
|
||||||
|
/*! \brief Create a fuse stream process node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] input_list input tensor list.
|
||||||
|
* \param [in] input_count input tensor number.
|
||||||
|
* \param [in] output_list output tensor list.
|
||||||
|
* \param [in] output_count output tensor number.
|
||||||
|
* \param [in] params the parameters for multi streamprocessor merging.
|
||||||
|
* \return <tt>\ref vx_node</tt>.
|
||||||
|
* \retval vx_node A node reference. Any possible errors preventing a successful creation
|
||||||
|
* should be checked using <tt>\ref vxGetStatus</tt>
|
||||||
|
* \ingroup group_vision_function_sp
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxFusedSpNode(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor* input_list,
|
||||||
|
vx_uint32 input_count,
|
||||||
|
vx_tensor* output_list,
|
||||||
|
vx_uint32 output_count,
|
||||||
|
const vx_nn_fused_sp_params_t * params
|
||||||
|
);
|
||||||
|
|
||||||
|
/*! \brief Create a conv fuse stream process node.
|
||||||
|
* \param [in] graph The handle to the graph.
|
||||||
|
* \param [in] inputs input tensor.
|
||||||
|
* \param [in] weights_biases [static] Point to WeightBiasesParameter data, vx_weights_biases_parameter is an opaque reference.
|
||||||
|
* \param [in] convolution_relu_pooling_params [static] Pointer to parameters of type <tt>\ref vx_nn_convolution_relu_pooling_params_t</tt>
|
||||||
|
* \param [in] size_of_convolution_relu_pooling_params [static] Size in bytes of convolution_relu_pooling_params.
|
||||||
|
* \param [in] outputs output tensor.
|
||||||
|
* \return <tt>\ref vx_node</tt>.
|
||||||
|
* \retval vx_node A node reference. Any possible errors preventing a successful creation
|
||||||
|
* should be checked using <tt>\ref vxGetStatus</tt>
|
||||||
|
* \ingroup group_vision_function_sp
|
||||||
|
*/
|
||||||
|
VX_API_ENTRY vx_node VX_API_CALL vxConvSpNode(
|
||||||
|
vx_graph graph,
|
||||||
|
vx_tensor inputs,
|
||||||
|
vx_weights_biases_parameter weights_biases,
|
||||||
|
const vx_nn_convolution_relu_pooling_params_t * convolution_relu_pooling_params,
|
||||||
|
vx_size size_of_convolution_relu_pooling_params,
|
||||||
|
vx_tensor outputs
|
||||||
|
);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -345,16 +345,6 @@ VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINST(
|
||||||
vx_context context
|
vx_context context
|
||||||
);
|
);
|
||||||
|
|
||||||
/*! \brief Creates an internal reference to a spinst data.
|
|
||||||
* \param [in] context The reference to the implementation context.
|
|
||||||
* \return A spinst data reference.
|
|
||||||
* \Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
|
||||||
* \ingroup group_object_spinst
|
|
||||||
*/
|
|
||||||
VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINSTInternal(
|
|
||||||
vx_context context
|
|
||||||
);
|
|
||||||
|
|
||||||
/*! \brief Releases a reference to a external spinst object.
|
/*! \brief Releases a reference to a external spinst object.
|
||||||
* The object may not be garbage collected until its total reference count is zero.
|
* The object may not be garbage collected until its total reference count is zero.
|
||||||
* \param [in] spinst_obj The pointer to the spinst data to release.
|
* \param [in] spinst_obj The pointer to the spinst data to release.
|
||||||
|
|
@ -368,19 +358,6 @@ VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINST(
|
||||||
vx_spinst *spinst_obj
|
vx_spinst *spinst_obj
|
||||||
);
|
);
|
||||||
|
|
||||||
/*! \brief Releases a reference to a internal spinst object.
|
|
||||||
* The object may not be garbage collected until its total reference count is zero.
|
|
||||||
* \param [in] spinst_obj The pointer to the spinst data to release.
|
|
||||||
* \post After returning from this function the reference is zeroed.
|
|
||||||
* \return A <tt>\ref vx_status_e</tt> enumeration.
|
|
||||||
* \retval VX_SUCCESS No errors; all other values indicate failure
|
|
||||||
* \retval * An error occurred. See <tt>\ref vx_status_e</tt>.
|
|
||||||
* \ingroup group_object_spinst
|
|
||||||
*/
|
|
||||||
VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINSTInternal(
|
|
||||||
vx_spinst *spinst_obj
|
|
||||||
);
|
|
||||||
|
|
||||||
/*! \brief Add a instruction to spinst object.
|
/*! \brief Add a instruction to spinst object.
|
||||||
* \param [in] spinst_obj The reference to the spinst object.
|
* \param [in] spinst_obj The reference to the spinst object.
|
||||||
* \param [in] inst_unit_array The units of one instruction. Use a <tt>\ref vx_spinst_unit_param</tt>.
|
* \param [in] inst_unit_array The units of one instruction. Use a <tt>\ref vx_spinst_unit_param</tt>.
|
||||||
|
|
|
||||||
|
|
@ -477,6 +477,8 @@ enum vx_type_e {
|
||||||
VX_TYPE_SPINST = 0x81B,/*!< \brief A <tt>\ref vx_spinst</tt>. */
|
VX_TYPE_SPINST = 0x81B,/*!< \brief A <tt>\ref vx_spinst</tt>. */
|
||||||
VX_TYPE_INT4 = 0x81C,/*!< \brief A <tt>\ref signed 4bits tensor.</tt>. */
|
VX_TYPE_INT4 = 0x81C,/*!< \brief A <tt>\ref signed 4bits tensor.</tt>. */
|
||||||
VX_TYPE_UINT4 = 0x81D,/*!< \brief A <tt>\ref unsigned 4bits tensor.</tt>. */
|
VX_TYPE_UINT4 = 0x81D,/*!< \brief A <tt>\ref unsigned 4bits tensor.</tt>. */
|
||||||
|
VX_TYPE_FLOAT8_E4M3 = 0x81E,/*!< \brief A <tt>\ref vx_float8_e4m3</tt>. */
|
||||||
|
VX_TYPE_FLOAT8_E5M2 = 0x81F,/*!< \brief A <tt>\ref vx_float8_e5m2</tt>. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief The enumeration of all status codes.
|
/*! \brief The enumeration of all status codes.
|
||||||
|
|
@ -803,6 +805,8 @@ enum vx_convert_policy_e {
|
||||||
VX_CONVERT_POLICY_WRAP = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x0,
|
VX_CONVERT_POLICY_WRAP = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x0,
|
||||||
/*! \brief Results are saturated to the bit depth of the output operand. */
|
/*! \brief Results are saturated to the bit depth of the output operand. */
|
||||||
VX_CONVERT_POLICY_SATURATE = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x1,
|
VX_CONVERT_POLICY_SATURATE = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x1,
|
||||||
|
/*! \brief Results preserve infinity and nan value. */
|
||||||
|
VX_CONVERT_POLICY_INF = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_CONVERT_POLICY) + 0x0,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief Based on the VX_DF_IMAGE definition.
|
/*! \brief Based on the VX_DF_IMAGE definition.
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -6,3 +6,6 @@ DEF_NODE_TYPE(custom_ainr_denoise_postprocess)
|
||||||
DEF_NODE_TYPE(custom_warp_affine)
|
DEF_NODE_TYPE(custom_warp_affine)
|
||||||
DEF_NODE_TYPE(custom_warp_perspective)
|
DEF_NODE_TYPE(custom_warp_perspective)
|
||||||
DEF_NODE_TYPE(custom_sample)
|
DEF_NODE_TYPE(custom_sample)
|
||||||
|
DEF_NODE_TYPE(custom_tiny_yolov4_postprocess)
|
||||||
|
DEF_NODE_TYPE(custom_tiny_yolov4_postprocess_confidence)
|
||||||
|
DEF_NODE_TYPE(custom_tiny_yolov4_postprocess_box)
|
||||||
|
|
|
||||||
|
|
@ -6,3 +6,6 @@ DEF_OP(CUSTOM_AINR_DENOISE_POSTPROCESS)
|
||||||
DEF_OP(CUSTOM_WARP_AFFINE)
|
DEF_OP(CUSTOM_WARP_AFFINE)
|
||||||
DEF_OP(CUSTOM_WARP_PERSPECTIVE)
|
DEF_OP(CUSTOM_WARP_PERSPECTIVE)
|
||||||
DEF_OP(CUSTOM_SAMPLE)
|
DEF_OP(CUSTOM_SAMPLE)
|
||||||
|
DEF_OP(CUSTOM_TINY_YOLOV4_POSTPROCESS)
|
||||||
|
DEF_OP(CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE)
|
||||||
|
DEF_OP(CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Vivante Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_H
|
||||||
|
#define _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_H
|
||||||
|
|
||||||
|
#include "vsi_nn_types.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct _vsi_nn_custom_tiny_yolov4_postprocess_param
|
||||||
|
{
|
||||||
|
struct _custom_tiny_yolov4_postprocess_local_data_t* local;
|
||||||
|
// Add parameters here
|
||||||
|
} vsi_nn_custom_tiny_yolov4_postprocess_param;
|
||||||
|
_compiler_assert(offsetof(vsi_nn_custom_tiny_yolov4_postprocess_param, local) == 0, \
|
||||||
|
vsi_nn_custom_tiny_yolov4_postprocess_h );
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Vivante Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX_H
|
||||||
|
#define _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX_H
|
||||||
|
|
||||||
|
#include "vsi_nn_types.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct _vsi_nn_custom_tiny_yolov4_postprocess_box_param
|
||||||
|
{
|
||||||
|
struct _custom_tiny_yolov4_postprocess_box_local_data_t* local;
|
||||||
|
// Add parameters here
|
||||||
|
float bias_0;
|
||||||
|
float bias_1;
|
||||||
|
} vsi_nn_custom_tiny_yolov4_postprocess_box_param;
|
||||||
|
_compiler_assert(offsetof(vsi_nn_custom_tiny_yolov4_postprocess_box_param, local) == 0, \
|
||||||
|
vsi_nn_custom_tiny_yolov4_postprocess_box_h );
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Vivante Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_H
|
||||||
|
#define _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_H
|
||||||
|
|
||||||
|
#include "vsi_nn_types.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct _vsi_nn_custom_tiny_yolov4_postprocess_confidence_param
|
||||||
|
{
|
||||||
|
struct _custom_tiny_yolov4_postprocess_confidence_local_data_t* local;
|
||||||
|
// Add parameters here
|
||||||
|
} vsi_nn_custom_tiny_yolov4_postprocess_confidence_param;
|
||||||
|
_compiler_assert(offsetof(vsi_nn_custom_tiny_yolov4_postprocess_confidence_param, local) == 0, \
|
||||||
|
vsi_nn_custom_tiny_yolov4_postprocess_confidence_h );
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
@ -38,6 +38,7 @@ typedef struct _vsi_nn_custom_warp_affine_param
|
||||||
const float *matrix;
|
const float *matrix;
|
||||||
vsi_enum type;
|
vsi_enum type;
|
||||||
int32_t size[2];
|
int32_t size[2];
|
||||||
|
vsi_enum rgb_type;
|
||||||
} vsi_nn_custom_warp_affine_param;
|
} vsi_nn_custom_warp_affine_param;
|
||||||
_compiler_assert(offsetof(vsi_nn_custom_warp_affine_param, local) == 0, \
|
_compiler_assert(offsetof(vsi_nn_custom_warp_affine_param, local) == 0, \
|
||||||
vsi_nn_custom_warp_affine_h );
|
vsi_nn_custom_warp_affine_h );
|
||||||
|
|
|
||||||
|
|
@ -31,5 +31,8 @@
|
||||||
#include "custom/ops/vsi_nn_op_custom_warp_affine.h"
|
#include "custom/ops/vsi_nn_op_custom_warp_affine.h"
|
||||||
#include "custom/ops/vsi_nn_op_custom_warp_perspective.h"
|
#include "custom/ops/vsi_nn_op_custom_warp_perspective.h"
|
||||||
#include "custom/ops/vsi_nn_op_custom_sample.h"
|
#include "custom/ops/vsi_nn_op_custom_sample.h"
|
||||||
|
#include "custom/ops/vsi_nn_op_custom_tiny_yolov4_postprocess.h"
|
||||||
|
#include "custom/ops/vsi_nn_op_custom_tiny_yolov4_postprocess_confidence.h"
|
||||||
|
#include "custom/ops/vsi_nn_op_custom_tiny_yolov4_postprocess_box.h"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -193,3 +193,4 @@ DEF_OP(REVERSESEQUENCE)
|
||||||
DEF_OP(INVERSE_SIGMOID)
|
DEF_OP(INVERSE_SIGMOID)
|
||||||
DEF_OP(GRID_SAMPLE)
|
DEF_OP(GRID_SAMPLE)
|
||||||
DEF_OP(LPNORM)
|
DEF_OP(LPNORM)
|
||||||
|
DEF_OP(RESIZE_3D)
|
||||||
|
|
|
||||||
|
|
@ -20,4 +20,3 @@ DEF_OP(SPACE2DEPTH_INTERNAL)
|
||||||
DEF_OP(GRUCELL_H_TIMES_ACTIVATION_R)
|
DEF_OP(GRUCELL_H_TIMES_ACTIVATION_R)
|
||||||
DEF_OP(GRUCELL_ACTIVATION_Z_H)
|
DEF_OP(GRUCELL_ACTIVATION_Z_H)
|
||||||
DEF_OP(REDUCE_MEAN_INTERNAL)
|
DEF_OP(REDUCE_MEAN_INTERNAL)
|
||||||
DEF_OP(BILINEAR_GRID_SAMPLE)
|
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,8 @@ typedef enum
|
||||||
BOOL8,
|
BOOL8,
|
||||||
I4,
|
I4,
|
||||||
U4,
|
U4,
|
||||||
|
FP8_E4M3,
|
||||||
|
FP8_E5M2,
|
||||||
} VSI_PUBLIC_TYPE vsi_nn_kernel_dtype_e;
|
} VSI_PUBLIC_TYPE vsi_nn_kernel_dtype_e;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
|
|
@ -89,6 +91,8 @@ typedef enum
|
||||||
VSI_NN_KERNEL_QUANT_ASYMM_PERCHANNEL,
|
VSI_NN_KERNEL_QUANT_ASYMM_PERCHANNEL,
|
||||||
VSI_NN_KERNEL_QUANT_SYMM,
|
VSI_NN_KERNEL_QUANT_SYMM,
|
||||||
VSI_NN_KERNEL_QUANT_SYMM_PERCHANNEL,
|
VSI_NN_KERNEL_QUANT_SYMM_PERCHANNEL,
|
||||||
|
VSI_NN_KERNEL_QUANT_FLOAT8,
|
||||||
|
VSI_NN_KERNEL_QUANT_FLOAT8_PERCHANNEL,
|
||||||
VSI_NN_KERNEL_QUANT_TYPE_NUM
|
VSI_NN_KERNEL_QUANT_TYPE_NUM
|
||||||
} vsi_nn_kernel_quant_type_e;
|
} vsi_nn_kernel_quant_type_e;
|
||||||
|
|
||||||
|
|
@ -522,6 +526,10 @@ static VSI_INLINE_API vsi_nn_kernel_dtype_e vsi_nn_kernel_map_dtype
|
||||||
return BF16;
|
return BF16;
|
||||||
case VSI_NN_TYPE_FLOAT32:
|
case VSI_NN_TYPE_FLOAT32:
|
||||||
return F32;
|
return F32;
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E4M3:
|
||||||
|
return FP8_E4M3;
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E5M2:
|
||||||
|
return FP8_E5M2;
|
||||||
default:
|
default:
|
||||||
VSILOGE("error data type %d", dtype);
|
VSILOGE("error data type %d", dtype);
|
||||||
break;
|
break;
|
||||||
|
|
@ -579,6 +587,8 @@ static VSI_INLINE_API size_t vsi_nn_kernel_dtype_get_bytes
|
||||||
case I8:
|
case I8:
|
||||||
case U8:
|
case U8:
|
||||||
case BOOL8:
|
case BOOL8:
|
||||||
|
case FP8_E4M3:
|
||||||
|
case FP8_E5M2:
|
||||||
return sizeof(int8_t);
|
return sizeof(int8_t);
|
||||||
case I16:
|
case I16:
|
||||||
case U16:
|
case U16:
|
||||||
|
|
@ -611,6 +621,8 @@ static VSI_INLINE_API vsi_size_t vsi_nn_kernel_dtype_get_bits
|
||||||
case I8:
|
case I8:
|
||||||
case U8:
|
case U8:
|
||||||
case BOOL8:
|
case BOOL8:
|
||||||
|
case FP8_E4M3:
|
||||||
|
case FP8_E5M2:
|
||||||
return 8;
|
return 8;
|
||||||
case I16:
|
case I16:
|
||||||
case U16:
|
case U16:
|
||||||
|
|
@ -879,7 +891,7 @@ static VSI_INLINE_API void vsi_nn_kernel_tensor_attr_get_stride
|
||||||
shape = attr->shape->data;
|
shape = attr->shape->data;
|
||||||
type_bits = vsi_nn_kernel_dtype_get_bits( attr->dtype );
|
type_bits = vsi_nn_kernel_dtype_get_bits( attr->dtype );
|
||||||
|
|
||||||
if ( type_bits < BITS_PER_BYTE )
|
if ( type_bits < BITS_PER_BYTE && type_bits != 0)
|
||||||
{
|
{
|
||||||
vsi_size_t i;
|
vsi_size_t i;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -91,4 +91,21 @@ vsi_bool vsi_nn_kernel_optimize_scatter_elements_shape
|
||||||
vsi_size_t* out_shape_x, uint32_t* out_rank_x, int32_t* out_axis, vsi_size_t max_size
|
vsi_size_t* out_shape_x, uint32_t* out_rank_x, int32_t* out_axis, vsi_size_t max_size
|
||||||
);
|
);
|
||||||
|
|
||||||
|
vsi_bool vsi_nn_kernel_optimize_matrixmul_broadcast_shape
|
||||||
|
(
|
||||||
|
const vsi_size_t * shape_x,
|
||||||
|
const vsi_size_t * shape_y,
|
||||||
|
const vsi_size_t * shape_output,
|
||||||
|
vsi_size_t rank_x,
|
||||||
|
vsi_size_t rank_y,
|
||||||
|
vsi_size_t rank_out,
|
||||||
|
vsi_size_t* out_shape_x,
|
||||||
|
vsi_size_t* out_shape_y,
|
||||||
|
vsi_size_t* out_shape_output,
|
||||||
|
uint32_t* new_rank,
|
||||||
|
uint32_t* cross_flg,
|
||||||
|
uint32_t* size_axis_inner_outer,
|
||||||
|
uint32_t* strides_axis_inner_outer
|
||||||
|
);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,12 @@ typedef struct _vsi_nn_pre_process_param
|
||||||
|
|
||||||
vsi_nn_pre_process_type_e type;
|
vsi_nn_pre_process_type_e type;
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
float mean[3];
|
||||||
|
float scale[3];
|
||||||
|
} norm2;
|
||||||
|
|
||||||
vsi_nn_pre_process_lcl_data *local;
|
vsi_nn_pre_process_lcl_data *local;
|
||||||
} vsi_nn_pre_process_param;
|
} vsi_nn_pre_process_param;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,10 @@ typedef struct _vsi_nn_pre_process_bgra_param
|
||||||
|
|
||||||
vsi_bool reverse_channel;
|
vsi_bool reverse_channel;
|
||||||
|
|
||||||
|
float r_scale;
|
||||||
|
float g_scale;
|
||||||
|
float b_scale;
|
||||||
|
|
||||||
/* pre process rgb layer local data structure */
|
/* pre process rgb layer local data structure */
|
||||||
vsi_nn_pre_process_bgra_lcl_data local;
|
vsi_nn_pre_process_bgra_lcl_data local;
|
||||||
} vsi_nn_pre_process_bgra_param;
|
} vsi_nn_pre_process_bgra_param;
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,10 @@ typedef struct _vsi_nn_pre_process_nv12_param
|
||||||
vsi_nn_pre_process_nv12_lcl_data* local;
|
vsi_nn_pre_process_nv12_lcl_data* local;
|
||||||
|
|
||||||
vsi_nn_nv_type nv_type;
|
vsi_nn_nv_type nv_type;
|
||||||
|
|
||||||
|
float r_scale;
|
||||||
|
float g_scale;
|
||||||
|
float b_scale;
|
||||||
} vsi_nn_pre_process_nv12_param;
|
} vsi_nn_pre_process_nv12_param;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -76,6 +76,9 @@ typedef struct _vsi_nn_pre_process_rgb_param
|
||||||
|
|
||||||
vsi_bool reverse_channel;
|
vsi_bool reverse_channel;
|
||||||
|
|
||||||
|
float r_scale;
|
||||||
|
float g_scale;
|
||||||
|
float b_scale;
|
||||||
/* pre process rgb layer local data structure */
|
/* pre process rgb layer local data structure */
|
||||||
vsi_nn_pre_process_rgb_lcl_data local;
|
vsi_nn_pre_process_rgb_lcl_data local;
|
||||||
} vsi_nn_pre_process_rgb_param;
|
} vsi_nn_pre_process_rgb_param;
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,15 @@ typedef struct _vsi_nn_pre_process_rgb888_planar_param
|
||||||
float g_mean;
|
float g_mean;
|
||||||
float b_mean;
|
float b_mean;
|
||||||
float scale;
|
float scale;
|
||||||
|
|
||||||
|
|
||||||
|
vsi_bool reverse_channel;
|
||||||
|
vsi_bool enable_rgb88_planar_nhwc;
|
||||||
|
|
||||||
|
float r_scale;
|
||||||
|
float g_scale;
|
||||||
|
float b_scale;
|
||||||
|
|
||||||
} vsi_nn_pre_process_rgb888_planar_param;
|
} vsi_nn_pre_process_rgb888_planar_param;
|
||||||
_compiler_assert(offsetof(vsi_nn_pre_process_rgb888_planar_param, local) == 0, \
|
_compiler_assert(offsetof(vsi_nn_pre_process_rgb888_planar_param, local) == 0, \
|
||||||
vsi_nn_pre_process_rgb888_planar_h );
|
vsi_nn_pre_process_rgb888_planar_h );
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,11 @@ typedef struct _vsi_nn_pre_process_yuv420_param
|
||||||
float rgb_scale;
|
float rgb_scale;
|
||||||
|
|
||||||
vsi_bool reverse_channel;
|
vsi_bool reverse_channel;
|
||||||
|
|
||||||
|
float r_scale;
|
||||||
|
float g_scale;
|
||||||
|
float b_scale;
|
||||||
|
|
||||||
/* local data must be the first. */
|
/* local data must be the first. */
|
||||||
vsi_nn_pre_process_yuv420_lcl_data local;
|
vsi_nn_pre_process_yuv420_lcl_data local;
|
||||||
} vsi_nn_pre_process_yuv420_param;
|
} vsi_nn_pre_process_yuv420_param;
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,10 @@ typedef struct _vsi_nn_pre_process_yuv422_param
|
||||||
float rgb_scale;
|
float rgb_scale;
|
||||||
|
|
||||||
vsi_bool reverse_channel;
|
vsi_bool reverse_channel;
|
||||||
|
|
||||||
|
float r_scale;
|
||||||
|
float g_scale;
|
||||||
|
float b_scale;
|
||||||
} vsi_nn_pre_process_yuv422_param;
|
} vsi_nn_pre_process_yuv422_param;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,10 @@ typedef struct _vsi_nn_pre_process_yuv444_param
|
||||||
float rgb_scale;
|
float rgb_scale;
|
||||||
|
|
||||||
vsi_bool reverse_channel;
|
vsi_bool reverse_channel;
|
||||||
|
|
||||||
|
float r_scale;
|
||||||
|
float g_scale;
|
||||||
|
float b_scale;
|
||||||
/* local data must be the first. */
|
/* local data must be the first. */
|
||||||
vsi_nn_pre_process_yuv444_lcl_data* local;
|
vsi_nn_pre_process_yuv444_lcl_data* local;
|
||||||
} vsi_nn_pre_process_yuv444_param;
|
} vsi_nn_pre_process_yuv444_param;
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@
|
||||||
*
|
*
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#ifndef _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
|
#ifndef _VSI_NN_OP_RESIZE_3D_H
|
||||||
#define _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
|
#define _VSI_NN_OP_RESIZE_3D_H
|
||||||
|
|
||||||
#include "vsi_nn_types.h"
|
#include "vsi_nn_types.h"
|
||||||
|
|
||||||
|
|
@ -31,17 +31,19 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef struct _vsi_nn_resize_3d_local_data {
|
||||||
|
vsi_bool use_internal_node;
|
||||||
|
} vsi_nn_resize_3d_local_data;
|
||||||
|
|
||||||
typedef struct _vsi_nn_bilinear_grid_sample_param
|
typedef struct _vsi_nn_resize_3d_param
|
||||||
{
|
{
|
||||||
struct _bilinear_grid_sample_local_data_t* local;
|
vsi_nn_resize_3d_local_data* lcl_data;
|
||||||
vsi_bool align_corners;
|
vsi_enum type;
|
||||||
vsi_nn_pad_mode_e padding_mode;
|
float factor;
|
||||||
int32_t const_val;
|
int32_t size[3];
|
||||||
} vsi_nn_bilinear_grid_sample_param;
|
vsi_bool align_corners;
|
||||||
|
vsi_bool half_pixel_centers;
|
||||||
_compiler_assert(offsetof(vsi_nn_bilinear_grid_sample_param, local) == 0, \
|
} vsi_nn_resize_3d_param;
|
||||||
vsi_nn_bilinear_grid_sample_h );
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
@ -33,6 +33,7 @@ extern "C" {
|
||||||
typedef struct _vsi_nn_topk_param
|
typedef struct _vsi_nn_topk_param
|
||||||
{
|
{
|
||||||
uint32_t k;
|
uint32_t k;
|
||||||
|
int32_t axis;
|
||||||
} vsi_nn_topk_param;
|
} vsi_nn_topk_param;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,9 @@ enum {
|
||||||
D_BF16 = VSI_NN_TYPE_BFLOAT16,
|
D_BF16 = VSI_NN_TYPE_BFLOAT16,
|
||||||
D_BOOL8 = VSI_NN_TYPE_BOOL8,
|
D_BOOL8 = VSI_NN_TYPE_BOOL8,
|
||||||
D_I4 = VSI_NN_TYPE_INT4,
|
D_I4 = VSI_NN_TYPE_INT4,
|
||||||
D_U4 = VSI_NN_TYPE_UINT4
|
D_U4 = VSI_NN_TYPE_UINT4,
|
||||||
|
D_F8_E4M3 = VSI_NN_TYPE_FLOAT8_E4M3,
|
||||||
|
D_F8_E5M2 = VSI_NN_TYPE_FLOAT8_E5M2
|
||||||
};
|
};
|
||||||
|
|
||||||
/* short alias for qtype */
|
/* short alias for qtype */
|
||||||
|
|
@ -63,6 +65,8 @@ enum {
|
||||||
Q_ASYM = VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC << Q_SHIFT,
|
Q_ASYM = VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC << Q_SHIFT,
|
||||||
Q_SYM_PC = VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC << Q_SHIFT,
|
Q_SYM_PC = VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC << Q_SHIFT,
|
||||||
Q_SYM = VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC << Q_SHIFT,
|
Q_SYM = VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC << Q_SHIFT,
|
||||||
|
Q_FP8 = VSI_NN_QNT_TYPE_SYMMETRIC_FLOAT8 << Q_SHIFT,
|
||||||
|
Q_FP8_PC = VSI_NN_QNT_TYPE_PERCHANNEL_SYMMETRIC_FLOAT8 << Q_SHIFT,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@
|
||||||
#include "vsi_nn_types.h"
|
#include "vsi_nn_types.h"
|
||||||
#include "vsi_nn_math.h"
|
#include "vsi_nn_math.h"
|
||||||
#include "vsi_nn_tensor.h"
|
#include "vsi_nn_tensor.h"
|
||||||
|
#include "vsi_nn_log.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
@ -78,6 +79,8 @@ static VSI_INLINE_API vsi_bool type_is_signed
|
||||||
case VSI_NN_TYPE_FLOAT32:
|
case VSI_NN_TYPE_FLOAT32:
|
||||||
case VSI_NN_TYPE_FLOAT64:
|
case VSI_NN_TYPE_FLOAT64:
|
||||||
case VSI_NN_TYPE_BFLOAT16:
|
case VSI_NN_TYPE_BFLOAT16:
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E4M3:
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E5M2:
|
||||||
ret = TRUE;
|
ret = TRUE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -93,9 +96,14 @@ static VSI_INLINE_API uint32_t type_get_bytes
|
||||||
{
|
{
|
||||||
switch( type )
|
switch( type )
|
||||||
{
|
{
|
||||||
|
case VSI_NN_TYPE_INT4:
|
||||||
|
case VSI_NN_TYPE_UINT4:
|
||||||
|
return 0;
|
||||||
case VSI_NN_TYPE_INT8:
|
case VSI_NN_TYPE_INT8:
|
||||||
case VSI_NN_TYPE_UINT8:
|
case VSI_NN_TYPE_UINT8:
|
||||||
case VSI_NN_TYPE_BOOL8:
|
case VSI_NN_TYPE_BOOL8:
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E4M3:
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E5M2:
|
||||||
return 1;
|
return 1;
|
||||||
case VSI_NN_TYPE_INT16:
|
case VSI_NN_TYPE_INT16:
|
||||||
case VSI_NN_TYPE_UINT16:
|
case VSI_NN_TYPE_UINT16:
|
||||||
|
|
@ -111,7 +119,8 @@ static VSI_INLINE_API uint32_t type_get_bytes
|
||||||
case VSI_NN_TYPE_FLOAT64:
|
case VSI_NN_TYPE_FLOAT64:
|
||||||
return 8;
|
return 8;
|
||||||
default:
|
default:
|
||||||
return 0;
|
VSILOGE("unsupported type: %d", type);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
} /* type_get_bytes() */
|
} /* type_get_bytes() */
|
||||||
|
|
||||||
|
|
@ -128,6 +137,8 @@ static VSI_INLINE_API uint32_t type_get_bits
|
||||||
case VSI_NN_TYPE_INT8:
|
case VSI_NN_TYPE_INT8:
|
||||||
case VSI_NN_TYPE_UINT8:
|
case VSI_NN_TYPE_UINT8:
|
||||||
case VSI_NN_TYPE_BOOL8:
|
case VSI_NN_TYPE_BOOL8:
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E4M3:
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E5M2:
|
||||||
return 8;
|
return 8;
|
||||||
case VSI_NN_TYPE_INT16:
|
case VSI_NN_TYPE_INT16:
|
||||||
case VSI_NN_TYPE_UINT16:
|
case VSI_NN_TYPE_UINT16:
|
||||||
|
|
@ -143,7 +154,8 @@ static VSI_INLINE_API uint32_t type_get_bits
|
||||||
case VSI_NN_TYPE_FLOAT64:
|
case VSI_NN_TYPE_FLOAT64:
|
||||||
return 64;
|
return 64;
|
||||||
default:
|
default:
|
||||||
return 0;
|
VSILOGE("unsupported type: %d", type);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
} /* type_get_bits() */
|
} /* type_get_bits() */
|
||||||
|
|
||||||
|
|
@ -236,6 +248,7 @@ static VSI_INLINE_API float affine_to_fp32
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
float data;
|
float data;
|
||||||
|
VSI_UNREFERENCED(type);
|
||||||
data = ( (float)val - zero_point ) * scale;
|
data = ( (float)val - zero_point ) * scale;
|
||||||
return data;
|
return data;
|
||||||
} /* affine_to_fp32() */
|
} /* affine_to_fp32() */
|
||||||
|
|
@ -279,6 +292,7 @@ static VSI_INLINE_API float dfp_to_fp32
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
float result;
|
float result;
|
||||||
|
VSI_UNREFERENCED(type);
|
||||||
if( fl > 0 )
|
if( fl > 0 )
|
||||||
{
|
{
|
||||||
result = (float)val * ( 1.0f / ( (float) ( (int64_t)1 << fl ) ) );
|
result = (float)val * ( 1.0f / ( (float) ( (int64_t)1 << fl ) ) );
|
||||||
|
|
@ -440,6 +454,139 @@ static VSI_INLINE_API uint16_t fp32_to_bfp16_rtne
|
||||||
return out;
|
return out;
|
||||||
} /* fp32_to_bfp16_rtne */
|
} /* fp32_to_bfp16_rtne */
|
||||||
|
|
||||||
|
#define FLOAT_BIAS_EXPONENT 127
|
||||||
|
#define FLOAT_EXPONENT_SIZE 8
|
||||||
|
#define FLOAT_MANTISSA_SIZE 23
|
||||||
|
#define FLOAT8_E4M3_BIAS_EXPONENT 7
|
||||||
|
#define FLOAT8_E4M3_EXPONENT_SIZE 4
|
||||||
|
#define FLOAT8_E4M3_MANTISSA_SIZE 3
|
||||||
|
#define FLOAT8_E5M2_BIAS_EXPONENT 15
|
||||||
|
#define FLOAT8_E5M2_EXPONENT_SIZE 5
|
||||||
|
#define FLOAT8_E5M2_MANTISSA_SIZE 2
|
||||||
|
|
||||||
|
static VSI_INLINE_API uint8_t fp32_to_fp8_e4m3(float in, const float scale) {
|
||||||
|
float fp8_f32 = in / scale;
|
||||||
|
int32_t fp8_i32 = *((int32_t*)&fp8_f32);
|
||||||
|
//int32_t mask = (int32_t)(pow(2, 32) - 1 - (pow(2, 23 - 3) - 1));
|
||||||
|
int32_t eps = 1 << (23 - 3 - 1);
|
||||||
|
fp8_i32 += eps;
|
||||||
|
//fp8_i32 &= mask;
|
||||||
|
{
|
||||||
|
int sign = (fp8_i32 >> (FLOAT_EXPONENT_SIZE + FLOAT_MANTISSA_SIZE)) & 0x1;
|
||||||
|
int exp = (fp8_i32 >> FLOAT_MANTISSA_SIZE) & 0xff;
|
||||||
|
int expShiftValue = FLOAT8_E4M3_BIAS_EXPONENT - FLOAT_BIAS_EXPONENT;
|
||||||
|
int mantissa = (fp8_i32 >> (FLOAT_MANTISSA_SIZE - FLOAT8_E4M3_MANTISSA_SIZE)) & 0x7;
|
||||||
|
|
||||||
|
exp = (exp + expShiftValue) & 0xF;
|
||||||
|
|
||||||
|
return (uint8_t)(sign << 7 | exp << 3 | mantissa);
|
||||||
|
}
|
||||||
|
} /* fp32_to_fp8_e4m3() */
|
||||||
|
|
||||||
|
static VSI_INLINE_API uint8_t fp32_to_fp8_e5m2(float in, const float scale) {
|
||||||
|
float fp8_f32 = in / scale;
|
||||||
|
int32_t fp8_i32 = *((int32_t*)&fp8_f32);
|
||||||
|
//int32_t mask = (int32_t)(pow(2, 32) - 1 - (pow(2, 23 - 2) - 1));
|
||||||
|
int32_t eps = 1 << (23 - 2 - 1);
|
||||||
|
fp8_i32 += eps;
|
||||||
|
//fp8_i32 &= mask;
|
||||||
|
{
|
||||||
|
int sign = (fp8_i32 >> (FLOAT_EXPONENT_SIZE + FLOAT_MANTISSA_SIZE)) & 0x1;
|
||||||
|
int exp = (fp8_i32 >> FLOAT_MANTISSA_SIZE) & 0xff;
|
||||||
|
int expShiftValue = FLOAT8_E5M2_BIAS_EXPONENT - FLOAT_BIAS_EXPONENT;
|
||||||
|
int mantissa = (fp8_i32 >> (FLOAT_MANTISSA_SIZE - FLOAT8_E5M2_MANTISSA_SIZE)) & 0x3;
|
||||||
|
|
||||||
|
exp = (exp + expShiftValue) & 0x1F;
|
||||||
|
|
||||||
|
return (uint8_t)(sign << 7 | exp << 2 | mantissa);
|
||||||
|
}
|
||||||
|
} /* fp32_to_fp8_e5m2() */
|
||||||
|
|
||||||
|
static VSI_INLINE_API float fp8_e4m3_to_fp32(uint8_t in, const float scale) {
|
||||||
|
float val_fp32;
|
||||||
|
|
||||||
|
uint32_t signOut = 0;
|
||||||
|
uint32_t exponentOut = 0;
|
||||||
|
uint32_t mantissaOut = 0;
|
||||||
|
uint32_t out_u = 0;
|
||||||
|
|
||||||
|
uint32_t signIn;
|
||||||
|
uint32_t exponentIn;
|
||||||
|
uint32_t mantissaIn;
|
||||||
|
int expShiftValue = FLOAT_BIAS_EXPONENT - FLOAT8_E4M3_BIAS_EXPONENT;
|
||||||
|
|
||||||
|
signIn = (in >> (FLOAT8_E4M3_EXPONENT_SIZE + FLOAT8_E4M3_MANTISSA_SIZE)) & 0x1;
|
||||||
|
exponentIn = (in >> FLOAT8_E4M3_MANTISSA_SIZE) & 0xF;
|
||||||
|
mantissaIn = in & 0x7;
|
||||||
|
|
||||||
|
signOut = signIn;
|
||||||
|
|
||||||
|
if (exponentIn == 0 && mantissaIn == 0)
|
||||||
|
{
|
||||||
|
goto final;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exponentIn == 0xf && mantissaIn == 0x7)
|
||||||
|
{
|
||||||
|
exponentOut = 0xff;
|
||||||
|
mantissaOut = 0x400000;
|
||||||
|
goto final;
|
||||||
|
}
|
||||||
|
|
||||||
|
exponentOut = (exponentIn + expShiftValue) & 0xff;
|
||||||
|
mantissaOut = (mantissaIn << (FLOAT_MANTISSA_SIZE - FLOAT8_E4M3_MANTISSA_SIZE)) & 0x7fffff;
|
||||||
|
|
||||||
|
|
||||||
|
final:
|
||||||
|
out_u = signOut << 31 | exponentOut << 23 | mantissaOut;
|
||||||
|
val_fp32 = *((float*)&out_u);
|
||||||
|
|
||||||
|
return val_fp32 * scale;
|
||||||
|
} /* fp8_e4m3_to_fp32() */
|
||||||
|
|
||||||
|
static VSI_INLINE_API float fp8_e5m2_to_fp32(int8_t in, const float scale) {
|
||||||
|
float val_fp32;
|
||||||
|
|
||||||
|
uint32_t signOut = 0;
|
||||||
|
uint32_t exponentOut = 0;
|
||||||
|
uint32_t mantissaOut = 0;
|
||||||
|
uint32_t out_u = 0;
|
||||||
|
|
||||||
|
uint32_t signIn;
|
||||||
|
uint32_t exponentIn;
|
||||||
|
uint32_t mantissaIn;
|
||||||
|
int expShiftValue = FLOAT_BIAS_EXPONENT - FLOAT8_E5M2_BIAS_EXPONENT;
|
||||||
|
|
||||||
|
signIn = (in >> 7) & 0x1;
|
||||||
|
exponentIn = (in >> 2) & 0x1F;
|
||||||
|
mantissaIn = in & 0x3;
|
||||||
|
|
||||||
|
signOut = signIn;
|
||||||
|
|
||||||
|
if (exponentIn == 0 && mantissaIn == 0)
|
||||||
|
{
|
||||||
|
goto final;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exponentIn == 0x1f && mantissaIn == 0x3)
|
||||||
|
{
|
||||||
|
exponentOut = 0xff;
|
||||||
|
mantissaOut = 0x400000;
|
||||||
|
goto final;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
exponentOut = (exponentIn + expShiftValue) & 0xff;
|
||||||
|
mantissaOut = (mantissaIn << (FLOAT_MANTISSA_SIZE - FLOAT8_E5M2_MANTISSA_SIZE)) & 0x7fffff;
|
||||||
|
|
||||||
|
|
||||||
|
final:
|
||||||
|
out_u = signOut << 31 | exponentOut << 23 | mantissaOut;
|
||||||
|
val_fp32 = *((float*)&out_u);
|
||||||
|
|
||||||
|
return val_fp32 * scale;
|
||||||
|
} /* fp8_e5m2_to_fp32() */
|
||||||
|
|
||||||
static VSI_INLINE_API vsi_status dtype_to_float32
|
static VSI_INLINE_API vsi_status dtype_to_float32
|
||||||
(
|
(
|
||||||
uint8_t *src,
|
uint8_t *src,
|
||||||
|
|
@ -458,6 +605,12 @@ static VSI_INLINE_API vsi_status dtype_to_float32
|
||||||
case VSI_NN_TYPE_BFLOAT16:
|
case VSI_NN_TYPE_BFLOAT16:
|
||||||
*dst = bfp16_to_fp32( *(int16_t *)src );
|
*dst = bfp16_to_fp32( *(int16_t *)src );
|
||||||
break;
|
break;
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E4M3:
|
||||||
|
*dst = fp8_e4m3_to_fp32(*(int8_t*)src, src_dtype->scale);
|
||||||
|
break;
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E5M2:
|
||||||
|
*dst = fp8_e5m2_to_fp32(*(int8_t *)src, src_dtype->scale);
|
||||||
|
break;
|
||||||
case VSI_NN_TYPE_INT4:
|
case VSI_NN_TYPE_INT4:
|
||||||
case VSI_NN_TYPE_UINT4:
|
case VSI_NN_TYPE_UINT4:
|
||||||
case VSI_NN_TYPE_INT8:
|
case VSI_NN_TYPE_INT8:
|
||||||
|
|
@ -511,6 +664,12 @@ static VSI_INLINE_API vsi_status float32_to_dtype
|
||||||
case VSI_NN_TYPE_BFLOAT16:
|
case VSI_NN_TYPE_BFLOAT16:
|
||||||
*(int16_t *)dst = fp32_to_bfp16_rtne( src );
|
*(int16_t *)dst = fp32_to_bfp16_rtne( src );
|
||||||
break;
|
break;
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E4M3:
|
||||||
|
*(int8_t *)dst = fp32_to_fp8_e4m3(src, dst_dtype->scale);
|
||||||
|
break;
|
||||||
|
case VSI_NN_TYPE_FLOAT8_E5M2:
|
||||||
|
*(int8_t *)dst = fp32_to_fp8_e5m2(src, dst_dtype->scale);
|
||||||
|
break;
|
||||||
case VSI_NN_TYPE_INT4:
|
case VSI_NN_TYPE_INT4:
|
||||||
case VSI_NN_TYPE_UINT4:
|
case VSI_NN_TYPE_UINT4:
|
||||||
case VSI_NN_TYPE_INT8:
|
case VSI_NN_TYPE_INT8:
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@
|
||||||
extern "C"{
|
extern "C"{
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define vsi_nn_LinkListInitRoot(n) do{n = NULL;} while (0);
|
#define vsi_nn_LinkListInitRoot(n) {n = NULL;}
|
||||||
|
|
||||||
typedef struct _vsi_nn_link_list
|
typedef struct _vsi_nn_link_list
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -53,12 +53,13 @@ extern "C" {
|
||||||
#define DEFINE_ARRAY_TYPE( NAME, TYPE ) \
|
#define DEFINE_ARRAY_TYPE( NAME, TYPE ) \
|
||||||
typedef struct { \
|
typedef struct { \
|
||||||
size_t size; \
|
size_t size; \
|
||||||
TYPE data[0]; \
|
TYPE *data; \
|
||||||
} vsi_##NAME##_array_t; \
|
} vsi_##NAME##_array_t; \
|
||||||
static VSI_INLINE_API vsi_##NAME##_array_t * vsi_##NAME##_array_create( size_t size ) { \
|
static VSI_INLINE_API vsi_##NAME##_array_t * vsi_##NAME##_array_create( size_t size ) { \
|
||||||
vsi_##NAME##_array_t * array = (vsi_##NAME##_array_t *)malloc( \
|
vsi_##NAME##_array_t * array = NULL; \
|
||||||
sizeof(vsi_##NAME##_array_t) + sizeof(TYPE) * size ); \
|
array = (vsi_##NAME##_array_t *)malloc( sizeof(vsi_##NAME##_array_t) + sizeof(TYPE) * size ); \
|
||||||
if (array == NULL) return NULL; \
|
if (array == NULL) return NULL; \
|
||||||
|
array->data = (TYPE *)(((TYPE**)(&(array->data))) + 1); \
|
||||||
array->size = size; \
|
array->size = size; \
|
||||||
return array; \
|
return array; \
|
||||||
} \
|
} \
|
||||||
|
|
|
||||||
|
|
@ -50,14 +50,23 @@ extern "C" {
|
||||||
free( _PTR ); _PTR = NULL; }
|
free( _PTR ); _PTR = NULL; }
|
||||||
|
|
||||||
#define vsi_safe_release_tensor(_t) if(_t){vsi_nn_ReleaseTensor(&(_t)); _t = NULL;}
|
#define vsi_safe_release_tensor(_t) if(_t){vsi_nn_ReleaseTensor(&(_t)); _t = NULL;}
|
||||||
|
#if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32))
|
||||||
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffebadcaffe)
|
#if defined(_WIN64)
|
||||||
|
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffebadcaffe)
|
||||||
|
#else
|
||||||
|
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffe)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffebadcaffe)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define FOREACH_ARGS(_args, _next, _arg_type) \
|
#define FOREACH_ARGS(_args, _next, _arg_type) \
|
||||||
while(((_arg_type)((size_t)END_OF_VARIADIC_ARGUMENTS)) != (_next = va_arg(_args, _arg_type)))
|
while(((_arg_type)((size_t)END_OF_VARIADIC_ARGUMENTS)) != (_next = va_arg(_args, _arg_type)))
|
||||||
|
|
||||||
#define BITS_PER_BYTE 8
|
#define BITS_PER_BYTE 8
|
||||||
|
|
||||||
|
#define VSI_UNREFERENCED( param ) ( ( void ) ( param ) )
|
||||||
|
|
||||||
#define VSI_NN_STRINGIZE(X) VSI_NN_DO_STRINGIZE(X)
|
#define VSI_NN_STRINGIZE(X) VSI_NN_DO_STRINGIZE(X)
|
||||||
#define VSI_NN_DO_STRINGIZE(X) #X
|
#define VSI_NN_DO_STRINGIZE(X) #X
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,7 @@ typedef struct _vsi_nn_runtime_option_t
|
||||||
int32_t enable_asymi8_to_u8;
|
int32_t enable_asymi8_to_u8;
|
||||||
int32_t enable_dataconvert_optimize;
|
int32_t enable_dataconvert_optimize;
|
||||||
int32_t enable_stream_processor;
|
int32_t enable_stream_processor;
|
||||||
|
int32_t enable_rgb88_planar_nhwc;
|
||||||
} vsi_nn_runtime_option_t;
|
} vsi_nn_runtime_option_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -31,33 +31,42 @@
|
||||||
#define VSI_ASSERT( cond ) assert(cond)
|
#define VSI_ASSERT( cond ) assert(cond)
|
||||||
|
|
||||||
#define VSI_CHECK_PTR( pointer, msg, retval ) \
|
#define VSI_CHECK_PTR( pointer, msg, retval ) \
|
||||||
do { \
|
{ \
|
||||||
if( pointer == NULL ) { \
|
if( pointer == NULL ) { \
|
||||||
VSILOGD("%s",msg); \
|
VSILOGD("%s",msg); \
|
||||||
VSI_ASSERT(FALSE); \
|
VSI_ASSERT(FALSE); \
|
||||||
} \
|
} \
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
|
|
||||||
#define CHECK_STATUS_FAIL_GOTO( stat, lbl ) do {\
|
#define CHECK_STATUS_FAIL_GOTO( stat, lbl ) {\
|
||||||
if( VSI_SUCCESS != stat ) {\
|
if( VSI_SUCCESS != stat ) {\
|
||||||
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
|
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
|
||||||
goto lbl;\
|
goto lbl;\
|
||||||
}\
|
}\
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
#define CHECK_STATUS( stat ) do {\
|
#define CHECK_STATUS( stat ) {\
|
||||||
if( VSI_SUCCESS != stat ) {\
|
if( VSI_SUCCESS != stat ) {\
|
||||||
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
|
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
|
||||||
}\
|
}\
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
#define CHECK_PTR_FAIL_GOTO( pointer, msg, lbl ) \
|
#define CHECK_PTR_FAIL_GOTO( pointer, msg, lbl ) \
|
||||||
do { \
|
{ \
|
||||||
if( pointer == NULL ) { \
|
if( pointer == NULL ) { \
|
||||||
VSILOGD("CHECK POINTER %s", msg); \
|
VSILOGD("CHECK POINTER %s", msg); \
|
||||||
goto lbl; \
|
goto lbl; \
|
||||||
} \
|
} \
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
|
#define CHECK_PTR_FAIL_GOTO_RLS_INTERNAL_NODE( pointer, node, msg, lbl ) \
|
||||||
|
{ \
|
||||||
|
if( pointer == NULL ) { \
|
||||||
|
vsi_nn_internal_release_node(&node);\
|
||||||
|
VSILOGD("CHECK POINTER %s", msg); \
|
||||||
|
goto lbl; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -1,26 +1,3 @@
|
||||||
/****************************************************************************
|
|
||||||
*
|
|
||||||
* Copyright (c) 2019 Vivante Corporation
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the Software),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
||||||
* DEALINGS IN THE SOFTWARE.
|
|
||||||
*
|
|
||||||
*****************************************************************************/
|
|
||||||
/*****Auto generated header file, Please DO NOT modify manually!*****/
|
/*****Auto generated header file, Please DO NOT modify manually!*****/
|
||||||
#ifndef _VSI_NN_FEATURE_CONFIG_H
|
#ifndef _VSI_NN_FEATURE_CONFIG_H
|
||||||
#define _VSI_NN_FEATURE_CONFIG_H
|
#define _VSI_NN_FEATURE_CONFIG_H
|
||||||
|
|
@ -42,5 +19,6 @@
|
||||||
#if defined(VX_TENSORVIEW_ON_ANY_DIM) && VX_TENSORVIEW_ON_ANY_DIM
|
#if defined(VX_TENSORVIEW_ON_ANY_DIM) && VX_TENSORVIEW_ON_ANY_DIM
|
||||||
#define VSI_CONCAT_ENHANCE_SUPPORT
|
#define VSI_CONCAT_ENHANCE_SUPPORT
|
||||||
#endif
|
#endif
|
||||||
|
#define VSI_CREATE_TENSOR_FROM_VIEW_SUPPORT
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -361,6 +361,27 @@ OVXLIB_API vsi_nn_tensor_id_t vsi_nn_AddTensorFromHandle
|
||||||
uint8_t * data
|
uint8_t * data
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a new tensor from view
|
||||||
|
* Create a new tensor from a view and add it to graph.
|
||||||
|
*
|
||||||
|
* @param[in] graph Graph handle.
|
||||||
|
* @param[in] id Required, the id of the parent tensor on which to create view.
|
||||||
|
* @param[in] start The start cooridinates for each dim, 0-based none-negative interger.
|
||||||
|
* NULL means copy from the idx 0 of each dim.
|
||||||
|
* @param[in] end The end cooridinates for each dim, 0-based none-negative interger.
|
||||||
|
* NULL means copy to the end of each dim. For the given idx, the end[idx]
|
||||||
|
* should be greater than start[idx].
|
||||||
|
* @return The new tensor id on success, or VSI_NN_TENSOR_ID_NA otheriwse.
|
||||||
|
*/
|
||||||
|
OVXLIB_API vsi_nn_tensor_id_t vsi_nn_AddTensorFromView
|
||||||
|
(
|
||||||
|
vsi_nn_graph_t* graph,
|
||||||
|
vsi_nn_tensor_id_t id,
|
||||||
|
vsi_size_t* start,
|
||||||
|
vsi_size_t* end
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attach tensor to graph
|
* Attach tensor to graph
|
||||||
* Attach an exist tensor to graph.
|
* Attach an exist tensor to graph.
|
||||||
|
|
|
||||||
|
|
@ -206,8 +206,8 @@
|
||||||
#include "ops/vsi_nn_op_maxunpool.h"
|
#include "ops/vsi_nn_op_maxunpool.h"
|
||||||
#include "ops/vsi_nn_op_reversesequence.h"
|
#include "ops/vsi_nn_op_reversesequence.h"
|
||||||
#include "ops/vsi_nn_op_grid_sample.h"
|
#include "ops/vsi_nn_op_grid_sample.h"
|
||||||
#include "ops/vsi_nn_op_bilinear_grid_sample.h"
|
|
||||||
#include "ops/vsi_nn_op_lpnorm.h"
|
#include "ops/vsi_nn_op_lpnorm.h"
|
||||||
|
#include "ops/vsi_nn_op_resize_3d.h"
|
||||||
/* custom node head define define */
|
/* custom node head define define */
|
||||||
#include "custom/vsi_nn_custom_node_type.h"
|
#include "custom/vsi_nn_custom_node_type.h"
|
||||||
#include "ops/vsi_nn_op_inverse_sigmoid.h"
|
#include "ops/vsi_nn_op_inverse_sigmoid.h"
|
||||||
|
|
@ -402,8 +402,8 @@ typedef union _vsi_nn_nn_param
|
||||||
vsi_nn_reversesequence_param reversesequence;
|
vsi_nn_reversesequence_param reversesequence;
|
||||||
vsi_nn_inverse_sigmoid_param inverse_sigmoid;
|
vsi_nn_inverse_sigmoid_param inverse_sigmoid;
|
||||||
vsi_nn_grid_sample_param gridsample;
|
vsi_nn_grid_sample_param gridsample;
|
||||||
vsi_nn_bilinear_grid_sample_param bilinear_grid_sample;
|
|
||||||
vsi_nn_lpnorm_param lpnorm;
|
vsi_nn_lpnorm_param lpnorm;
|
||||||
|
vsi_nn_resize_3d_param resize_3d;
|
||||||
void* client_param;
|
void* client_param;
|
||||||
|
|
||||||
/* custom node data struct define */
|
/* custom node data struct define */
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ typedef enum
|
||||||
VSI_NN_PREPROCESS_IMAGE_RESIZE_BILINEAR,
|
VSI_NN_PREPROCESS_IMAGE_RESIZE_BILINEAR,
|
||||||
VSI_NN_PREPROCESS_IMAGE_RESIZE_NEAREST,
|
VSI_NN_PREPROCESS_IMAGE_RESIZE_NEAREST,
|
||||||
VSI_NN_PREPROCESS_DTYPE_CONVERT,
|
VSI_NN_PREPROCESS_DTYPE_CONVERT,
|
||||||
|
VSI_NN_PREPROCESS_MEANS_AND_SCALES,
|
||||||
} vsi_nn_preprocess_type_e;
|
} vsi_nn_preprocess_type_e;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -150,8 +151,25 @@ typedef struct
|
||||||
float scale;
|
float scale;
|
||||||
}vsi_nn_process_mean_and_scale_t;
|
}vsi_nn_process_mean_and_scale_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process mean and scale parameter structure
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
/** Mean value for each channel */
|
||||||
|
float* channel_mean;
|
||||||
|
/*Channel length */
|
||||||
|
int32_t channel_len;
|
||||||
|
/** Scale value */
|
||||||
|
float* scale;
|
||||||
|
/** Scale length */
|
||||||
|
int32_t scale_len;
|
||||||
|
}vsi_nn_process_means_and_scales_t;
|
||||||
|
|
||||||
typedef vsi_nn_process_mean_and_scale_t vsi_nn_preprocess_mean_and_scale_t;
|
typedef vsi_nn_process_mean_and_scale_t vsi_nn_preprocess_mean_and_scale_t;
|
||||||
|
typedef vsi_nn_process_means_and_scales_t vsi_nn_preprocess_means_and_scales_t;
|
||||||
typedef vsi_nn_process_mean_and_scale_t vsi_nn_postprocess_mean_and_scale_t;
|
typedef vsi_nn_process_mean_and_scale_t vsi_nn_postprocess_mean_and_scale_t;
|
||||||
|
typedef vsi_nn_process_means_and_scales_t vsi_nn_postprocess_means_and_scales_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process permute parameter structure
|
* Process permute parameter structure
|
||||||
|
|
|
||||||
|
|
@ -154,7 +154,7 @@ vsi_nn_internal_tensor_t* vsi_nn_rnn_transpose_time_major
|
||||||
vsi_bool use_virtual_tensor
|
vsi_bool use_virtual_tensor
|
||||||
);
|
);
|
||||||
|
|
||||||
void vsi_nn_rnn_split_input_tensor
|
vsi_status vsi_nn_rnn_split_input_tensor
|
||||||
(
|
(
|
||||||
vsi_nn_node_t * self,
|
vsi_nn_node_t * self,
|
||||||
vsi_nn_tensor_t * input,
|
vsi_nn_tensor_t * input,
|
||||||
|
|
@ -163,7 +163,7 @@ void vsi_nn_rnn_split_input_tensor
|
||||||
vsi_bool use_virtual_tensor
|
vsi_bool use_virtual_tensor
|
||||||
);
|
);
|
||||||
|
|
||||||
void vsi_nn_rnn_data_check_aligned
|
vsi_status vsi_nn_rnn_data_check_aligned
|
||||||
(
|
(
|
||||||
vsi_nn_node_t * self,
|
vsi_nn_node_t * self,
|
||||||
vsi_nn_tensor_t ** input,
|
vsi_nn_tensor_t ** input,
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,10 @@ typedef enum
|
||||||
VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC = 0x4,
|
VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC = 0x4,
|
||||||
/** affine perchannel asymmetric */
|
/** affine perchannel asymmetric */
|
||||||
VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_ASYMMETRIC = 0x5,
|
VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_ASYMMETRIC = 0x5,
|
||||||
|
/** float8 */
|
||||||
|
VSI_NN_QNT_TYPE_SYMMETRIC_FLOAT8 = 0x6,
|
||||||
|
/** perchannel float8 */
|
||||||
|
VSI_NN_QNT_TYPE_PERCHANNEL_SYMMETRIC_FLOAT8 = 0x7,
|
||||||
/** undefined type */
|
/** undefined type */
|
||||||
VSI_NN_QNT_TYPE_NA = 0xff,
|
VSI_NN_QNT_TYPE_NA = 0xff,
|
||||||
} vsi_nn_qnt_type_e;
|
} vsi_nn_qnt_type_e;
|
||||||
|
|
|
||||||
|
|
@ -734,13 +734,15 @@ vsi_status vsi_nn_copy_tensor_veiw_patch
|
||||||
/**
|
/**
|
||||||
* OVXLIB internal tensor util api
|
* OVXLIB internal tensor util api
|
||||||
* A wrapper api for OpenVX vxCopyTensorPatch
|
* A wrapper api for OpenVX vxCopyTensorPatch
|
||||||
* Allows the application to copy whole tensor patch from/into an tensor object.
|
* Allows the application to copy partial/whole tensor patch from/into an tensor object.
|
||||||
*
|
*
|
||||||
* @param[in] tensor OpenVX Tensor handle.
|
* @param[in] tensor OpenVX Tensor handle.
|
||||||
* @param[in] attr OVXLIB Tensor attr.
|
* @param[in] attr OVXLIB Tensor attr.
|
||||||
* @param[in] user_ptr The address of the memory location where to store the requested data.
|
* @param[in] user_ptr The address of the memory location where to store the requested data.
|
||||||
* @param[in] usage This declares the effect of the copy with regard to the tensor object
|
* @param[in] usage This declares the effect of the copy with regard to the tensor object
|
||||||
* support VX_READ_ONLY or VX_WRITE_ONLY
|
* support VX_READ_ONLY or VX_WRITE_ONLY
|
||||||
|
* @param[in] start The start cooridinates for each dim. NULL means copy from the idx 0 of each dim.
|
||||||
|
* @param[in] end The end cooridinates for each dim. NULL means copy to the end of each dim.
|
||||||
* @return VSI_SUCCESS on success, or error core otherwise.
|
* @return VSI_SUCCESS on success, or error core otherwise.
|
||||||
*/
|
*/
|
||||||
vsi_status vsi_nn_copy_tensor_patch
|
vsi_status vsi_nn_copy_tensor_patch
|
||||||
|
|
@ -748,7 +750,9 @@ vsi_status vsi_nn_copy_tensor_patch
|
||||||
vx_tensor tensor,
|
vx_tensor tensor,
|
||||||
vsi_nn_tensor_attr_t *attr,
|
vsi_nn_tensor_attr_t *attr,
|
||||||
void * user_ptr,
|
void * user_ptr,
|
||||||
vsi_enum usage
|
vsi_enum usage,
|
||||||
|
vsi_size_t* start,
|
||||||
|
vsi_size_t* end
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -31,26 +31,26 @@
|
||||||
extern "C"{
|
extern "C"{
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TEST_CHECK_TENSOR_ID( id, lbl ) do {\
|
#define TEST_CHECK_TENSOR_ID( id, lbl ) {\
|
||||||
if( VSI_NN_TENSOR_ID_NA == id ) {\
|
if( VSI_NN_TENSOR_ID_NA == id ) {\
|
||||||
VSILOGE("CHECK TENSOR ID %d", __LINE__);\
|
VSILOGE("CHECK TENSOR ID %d", __LINE__);\
|
||||||
goto lbl;\
|
goto lbl;\
|
||||||
}\
|
}\
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
#define TEST_CHECK_PTR( ptr, lbl ) do {\
|
#define TEST_CHECK_PTR( ptr, lbl ) {\
|
||||||
if( NULL == ptr ) {\
|
if( NULL == ptr ) {\
|
||||||
VSILOGE("CHECK PTR %d", __LINE__);\
|
VSILOGE("CHECK PTR %d", __LINE__);\
|
||||||
goto lbl;\
|
goto lbl;\
|
||||||
}\
|
}\
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
#define TEST_CHECK_STATUS( stat, lbl ) do {\
|
#define TEST_CHECK_STATUS( stat, lbl ) {\
|
||||||
if( VSI_SUCCESS != stat ) {\
|
if( VSI_SUCCESS != stat ) {\
|
||||||
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
|
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
|
||||||
goto lbl;\
|
goto lbl;\
|
||||||
}\
|
}\
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -191,6 +191,16 @@ typedef enum
|
||||||
VSI_NN_TYPE_BFLOAT16 = VX_TYPE_BFLOAT16,
|
VSI_NN_TYPE_BFLOAT16 = VX_TYPE_BFLOAT16,
|
||||||
#else
|
#else
|
||||||
VSI_NN_TYPE_BFLOAT16 = 0x81A,
|
VSI_NN_TYPE_BFLOAT16 = 0x81A,
|
||||||
|
#endif
|
||||||
|
#ifdef VSI_NN_TYPE_FLOAT8_E4M3_SUPPORT
|
||||||
|
VSI_NN_TYPE_FLOAT8_E4M3 = VX_TYPE_FLOAT8_E4M3,
|
||||||
|
#else
|
||||||
|
VSI_NN_TYPE_FLOAT8_E4M3 = 0X81E,
|
||||||
|
#endif
|
||||||
|
#ifdef VSI_NN_TYPE_FLOAT8_E5M2_SUPPORT
|
||||||
|
VSI_NN_TYPE_FLOAT8_E5M2 = VX_TYPE_FLOAT8_E5M2,
|
||||||
|
#else
|
||||||
|
VSI_NN_TYPE_FLOAT8_E5M2 = 0X81F,
|
||||||
#endif
|
#endif
|
||||||
VSI_NN_TYPE_VDATA = VX_TYPE_USER_STRUCT_START + 0x1,
|
VSI_NN_TYPE_VDATA = VX_TYPE_USER_STRUCT_START + 0x1,
|
||||||
|
|
||||||
|
|
@ -268,6 +278,11 @@ typedef enum _vsi_nn_roi_align_type_e
|
||||||
VSI_NN_ROI_ALIGN
|
VSI_NN_ROI_ALIGN
|
||||||
} vsi_nn_roi_align_type_e;
|
} vsi_nn_roi_align_type_e;
|
||||||
|
|
||||||
|
typedef enum _vsi_nn_custom_warp_affine_type_e {
|
||||||
|
VSI_NN_WARP_AFFINE_TYPE_NONE = 0,
|
||||||
|
VSI_NN_WARP_AFFINE_TYPE_RGB
|
||||||
|
} vsi_nn_custom_warp_affine_type_e;
|
||||||
|
|
||||||
/** Deprecated */
|
/** Deprecated */
|
||||||
typedef uint32_t vsi_nn_size_t;
|
typedef uint32_t vsi_nn_size_t;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ extern "C"{
|
||||||
|
|
||||||
#define VSI_NN_VERSION_MAJOR 1
|
#define VSI_NN_VERSION_MAJOR 1
|
||||||
#define VSI_NN_VERSION_MINOR 1
|
#define VSI_NN_VERSION_MINOR 1
|
||||||
#define VSI_NN_VERSION_PATCH 74
|
#define VSI_NN_VERSION_PATCH 84
|
||||||
#define VSI_NN_VERSION \
|
#define VSI_NN_VERSION \
|
||||||
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
|
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,578 @@
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Vivante Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "vsi_nn_types.h"
|
||||||
|
#include "vsi_nn_log.h"
|
||||||
|
#include "vsi_nn_node.h"
|
||||||
|
#include "vsi_nn_prv.h"
|
||||||
|
#include "vsi_nn_ops.h"
|
||||||
|
#include "vsi_nn_tensor.h"
|
||||||
|
#include "vsi_nn_error.h"
|
||||||
|
#include "utils/vsi_nn_util.h"
|
||||||
|
#include "kernel/vsi_nn_kernel.h"
|
||||||
|
#include "vsi_nn_internal_node.h"
|
||||||
|
#include "utils/vsi_nn_constraint_check.h"
|
||||||
|
|
||||||
|
typedef struct _custom_tiny_yolov4_postprocess_local_data_t {
|
||||||
|
vx_int32 begin_dims[6][VSI_NN_MAX_DIM_NUM];
|
||||||
|
vx_int32 end_dims[6][VSI_NN_MAX_DIM_NUM];
|
||||||
|
vx_int32 stride_dims[VSI_NN_MAX_DIM_NUM];
|
||||||
|
} custom_tiny_yolov4_postprocess_local_data_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Declare number of input and output.
|
||||||
|
*/
|
||||||
|
#define _INPUT_NUM (4)
|
||||||
|
#define _OUTPUT_NUM (2)
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_internal_tensor
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_tensor_attr_t attr;
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
|
||||||
|
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
|
||||||
|
memcpy( &attr.dtype, &input->attr.dtype, sizeof( attr.dtype ) );
|
||||||
|
attr.dim_num = VSI_NN_DIM_AUTO;
|
||||||
|
attr.vtl = TRUE;
|
||||||
|
attr.is_const = FALSE;
|
||||||
|
tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
|
||||||
|
|
||||||
|
return tensor;
|
||||||
|
} /* _create_internal_tensor() */
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_sigmoid_internal_tensor
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_tensor_attr_t attr;
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
|
||||||
|
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
|
||||||
|
memcpy( &attr.dtype, &input->attr.dtype, sizeof( attr.dtype ) );
|
||||||
|
if (attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC ||
|
||||||
|
attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC)
|
||||||
|
{
|
||||||
|
attr.dtype.scale = 0.00390625;
|
||||||
|
attr.dtype.zero_point = 0;
|
||||||
|
}
|
||||||
|
attr.dim_num = VSI_NN_DIM_AUTO;
|
||||||
|
attr.vtl = TRUE;
|
||||||
|
attr.is_const = FALSE;
|
||||||
|
tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
|
||||||
|
|
||||||
|
return tensor;
|
||||||
|
} /* _create_sigmoid_internal_tensor() */
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_output_internal_tensor
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_tensor_attr_t attr;
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
|
||||||
|
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
|
||||||
|
memcpy( &attr.dtype, &output->attr.dtype, sizeof( attr.dtype ) );
|
||||||
|
attr.dim_num = VSI_NN_DIM_AUTO;
|
||||||
|
attr.vtl = TRUE;
|
||||||
|
attr.is_const = FALSE;
|
||||||
|
tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
|
||||||
|
|
||||||
|
return tensor;
|
||||||
|
} /* _create_output_internal_tensor() */
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_strided_slice_op
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input,
|
||||||
|
int32_t begin_mask,
|
||||||
|
int32_t end_mask,
|
||||||
|
int32_t index
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_custom_tiny_yolov4_postprocess_param * p = NULL;
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
vsi_nn_internal_node_t* curr = NULL;
|
||||||
|
p = (vsi_nn_custom_tiny_yolov4_postprocess_param *)&(self->nn_param.custom_tiny_yolov4_postprocess);
|
||||||
|
|
||||||
|
tensor = _create_internal_tensor(self, input);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
|
||||||
|
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_STRIDED_SLICE, 0, 0 );
|
||||||
|
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
|
||||||
|
curr->node->nn_param.strided_slice.begin_dims = p->local->begin_dims[index];
|
||||||
|
curr->node->nn_param.strided_slice.begin_dims_num = input->attr.dim_num;
|
||||||
|
curr->node->nn_param.strided_slice.end_dims = p->local->end_dims[index];
|
||||||
|
curr->node->nn_param.strided_slice.end_dims_num = input->attr.dim_num;
|
||||||
|
curr->node->nn_param.strided_slice.stride_dims = p->local->stride_dims;
|
||||||
|
curr->node->nn_param.strided_slice.stride_dims_num = input->attr.dim_num;
|
||||||
|
curr->node->nn_param.strided_slice.begin_mask = begin_mask;
|
||||||
|
curr->node->nn_param.strided_slice.end_mask = end_mask;
|
||||||
|
curr->node->nn_param.strided_slice.shrink_axis_mask = 0;
|
||||||
|
curr->node->nn_param.strided_slice.new_axis_mask = 0;
|
||||||
|
curr->inputs[0] = input;
|
||||||
|
curr->outputs[0] = tensor->t;
|
||||||
|
vsi_nn_internal_setup_node( self, curr );
|
||||||
|
|
||||||
|
final:
|
||||||
|
return tensor;
|
||||||
|
} /* _create_strided_slice() */
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_sigmoid_op
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
vsi_nn_internal_node_t* curr = NULL;
|
||||||
|
|
||||||
|
tensor = _create_sigmoid_internal_tensor(self, input);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_SIGMOID, 0, 0 );
|
||||||
|
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
|
||||||
|
curr->inputs[0] = input;
|
||||||
|
curr->outputs[0] = tensor->t;
|
||||||
|
vsi_nn_internal_setup_node( self, curr );
|
||||||
|
|
||||||
|
final:
|
||||||
|
return tensor;
|
||||||
|
} /* _create_sigmoid_op() */
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_confidence_op
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input,
|
||||||
|
vsi_nn_tensor_t * output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
vsi_nn_internal_node_t* curr = NULL;
|
||||||
|
|
||||||
|
tensor = _create_output_internal_tensor(self, output);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE, 0, 0 );
|
||||||
|
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
|
||||||
|
curr->inputs[0] = input;
|
||||||
|
curr->outputs[0] = tensor->t;
|
||||||
|
vsi_nn_internal_setup_node( self, curr );
|
||||||
|
|
||||||
|
final:
|
||||||
|
return tensor;
|
||||||
|
} /* _create_confidence_op() */
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_box_op
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input0,
|
||||||
|
vsi_nn_tensor_t * input1,
|
||||||
|
vsi_nn_tensor_t * output,
|
||||||
|
float bias0,
|
||||||
|
float bias1
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
vsi_nn_internal_node_t* curr = NULL;
|
||||||
|
|
||||||
|
tensor = _create_output_internal_tensor(self, output);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX, 0, 0 );
|
||||||
|
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
|
||||||
|
curr->inputs[0] = input0;
|
||||||
|
curr->inputs[1] = input1;
|
||||||
|
curr->outputs[0] = tensor->t;
|
||||||
|
curr->node->nn_param.custom_tiny_yolov4_postprocess_box.bias_0 = bias0;
|
||||||
|
curr->node->nn_param.custom_tiny_yolov4_postprocess_box.bias_1 = bias1;
|
||||||
|
vsi_nn_internal_setup_node( self, curr );
|
||||||
|
|
||||||
|
final:
|
||||||
|
return tensor;
|
||||||
|
} /* _create_box_op() */
|
||||||
|
|
||||||
|
static vsi_nn_internal_tensor_t *_create_reshape_op
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input,
|
||||||
|
vsi_nn_tensor_t * output,
|
||||||
|
vsi_size_t width
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_internal_tensor_t * tensor = NULL;
|
||||||
|
vsi_nn_internal_node_t* curr = NULL;
|
||||||
|
vsi_size_t shape_1[] = { 1, (vsi_size_t)-1, 1 };
|
||||||
|
|
||||||
|
shape_1[0] = width;
|
||||||
|
|
||||||
|
tensor = _create_output_internal_tensor(self, output);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESHAPE2, 0, 0 );
|
||||||
|
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
|
||||||
|
curr->inputs[0] = input;
|
||||||
|
curr->outputs[0] = tensor->t;
|
||||||
|
curr->node->nn_param.reshape2.size = shape_1;
|
||||||
|
curr->node->nn_param.reshape2.dim_num = 3;
|
||||||
|
vsi_nn_internal_setup_node( self, curr );
|
||||||
|
|
||||||
|
final:
|
||||||
|
return tensor;
|
||||||
|
} /* _create_reshape_op() */
|
||||||
|
|
||||||
|
static vsi_bool _create_concat_op
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t * input0,
|
||||||
|
vsi_nn_tensor_t * input1,
|
||||||
|
vsi_nn_tensor_t * input2,
|
||||||
|
vsi_nn_tensor_t * input3,
|
||||||
|
vsi_nn_tensor_t * input4,
|
||||||
|
vsi_nn_tensor_t * input5,
|
||||||
|
vsi_nn_tensor_t * output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_nn_internal_node_t* curr = NULL;
|
||||||
|
vsi_bool ret = FALSE;
|
||||||
|
|
||||||
|
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_CONCAT, 6, 1 );
|
||||||
|
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
|
||||||
|
curr->inputs[0] = input0;
|
||||||
|
curr->inputs[1] = input1;
|
||||||
|
curr->inputs[2] = input2;
|
||||||
|
curr->inputs[3] = input3;
|
||||||
|
curr->inputs[4] = input4;
|
||||||
|
curr->inputs[5] = input5;
|
||||||
|
curr->outputs[0] = output;
|
||||||
|
curr->node->nn_param.concat.axis = 1;
|
||||||
|
ret = vsi_nn_internal_setup_node( self, curr );
|
||||||
|
|
||||||
|
final:
|
||||||
|
return ret;
|
||||||
|
} /* _create_concat_op() */
|
||||||
|
|
||||||
|
static vsi_status op_compute
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
vsi_nn_tensor_t ** outputs
|
||||||
|
)
|
||||||
|
{
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
|
return vsi_nn_internal_compute_node( self );
|
||||||
|
} /* op_compute() */
|
||||||
|
|
||||||
|
static vsi_bool op_check
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
vsi_nn_tensor_t ** outputs
|
||||||
|
)
|
||||||
|
{
|
||||||
|
BEGIN_IO_TYPE_DECL(CUSTOM_TINY_YOLOV4_POSTPROCESS, 4, 2)
|
||||||
|
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM)
|
||||||
|
END_IO_TYPE_DECL(CUSTOM_TINY_YOLOV4_POSTPROCESS)
|
||||||
|
if (!VALIDATE_OP_IO_TYPES(CUSTOM_TINY_YOLOV4_POSTPROCESS, self, inputs,
|
||||||
|
self->input.num, outputs, self->output.num))
|
||||||
|
{
|
||||||
|
char* desc = generate_op_io_types_desc(inputs,
|
||||||
|
self->input.num, outputs, self->output.num);
|
||||||
|
VSILOGE("Inputs/Outputs data type not support: %s", desc);
|
||||||
|
destroy_op_io_types_desc(desc);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TRUE;
|
||||||
|
} /* op_check() */
|
||||||
|
|
||||||
|
static vsi_status op_optimize
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
vsi_nn_tensor_t ** outputs,
|
||||||
|
vsi_nn_opt_direction_e direction
|
||||||
|
)
|
||||||
|
{
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
|
return vsi_nn_internal_optimize_node( self, direction );
|
||||||
|
}
|
||||||
|
|
||||||
|
static vsi_bool op_setup
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
vsi_nn_tensor_t ** outputs
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_bool ret = FALSE;
|
||||||
|
vsi_nn_internal_tensor_t * tensor0[12] = {NULL};
|
||||||
|
vsi_nn_internal_tensor_t * tensor1[12] = {NULL};
|
||||||
|
int32_t index_0 = 1;
|
||||||
|
int32_t index_1 = 0;
|
||||||
|
int32_t index_2 = 3;
|
||||||
|
int32_t index_3 = 2;
|
||||||
|
|
||||||
|
vsi_nn_internal_init_node_wksp( self );
|
||||||
|
|
||||||
|
/**confidence**/
|
||||||
|
/**input 0 chunk 0**/
|
||||||
|
/*
|
||||||
|
sub0:26x26x255 --> 26x26x81, begin: [0, 0, 4, 0] end: [0, 0, 85, 0] stride: [1, 1, 1, 1]
|
||||||
|
sub1[26, 26, 80] = sigmoid(sub0)[26, 26, 0:0] * sigmoid(sub0)[26, 26, 1:81]
|
||||||
|
sub2[80, 26, 26] = transpose(sub1)
|
||||||
|
sub3[80, 676] = reshape(sub2)
|
||||||
|
*/
|
||||||
|
tensor0[0] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 0);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[0], "Create internal tensor fail.", final );
|
||||||
|
tensor0[1] = _create_sigmoid_op(self, tensor0[0]->t);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[1], "Create internal tensor fail.", final );
|
||||||
|
tensor0[2] = _create_confidence_op(self, tensor0[1]->t, outputs[0]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[2], "Create internal tensor fail.", final );
|
||||||
|
tensor0[3] = _create_reshape_op(self, tensor0[2]->t, outputs[0], 80);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[3], "Create internal tensor fail.", final );
|
||||||
|
/**chunk 1**/
|
||||||
|
/*
|
||||||
|
26x26x255 --> 26x26x81, begin: [0, 0, 89, 0] end: [0, 0, 170, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor0[4] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 1);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[4], "Create internal tensor fail.", final );
|
||||||
|
tensor0[5] = _create_sigmoid_op(self, tensor0[4]->t);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[5], "Create internal tensor fail.", final );
|
||||||
|
tensor0[6] = _create_confidence_op(self, tensor0[5]->t, outputs[0]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[6], "Create internal tensor fail.", final );
|
||||||
|
tensor0[7] = _create_reshape_op(self, tensor0[6]->t, outputs[0], 80);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[7], "Create internal tensor fail.", final );
|
||||||
|
/**chunk 2**/
|
||||||
|
/*
|
||||||
|
26x26x255 --> 26x26x81, begin: [0, 0, 174, 0] end: [0, 0, 255, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor0[8] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 2);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[8], "Create internal tensor fail.", final );
|
||||||
|
tensor0[9] = _create_sigmoid_op(self, tensor0[8]->t);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[9], "Create internal tensor fail.", final );
|
||||||
|
tensor0[10] = _create_confidence_op(self, tensor0[9]->t, outputs[0]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[10], "Create internal tensor fail.", final );
|
||||||
|
tensor0[11] = _create_reshape_op(self, tensor0[10]->t, outputs[0], 80);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[11], "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
/**input 1 chunk 0**/
|
||||||
|
/*
|
||||||
|
sub0:13x13x255 --> 26x26x81, begin: [0, 0, 4, 0] end: [0, 0, 85, 0] stride: [1, 1, 1, 1]
|
||||||
|
sub1[13, 13, 80] = sigmoid(sub0)[13, 13, 0:0] * sigmoid(sub0)[13, 13, 1:81]
|
||||||
|
sub2[80, 13, 13] = transpose(sub1)
|
||||||
|
sub3[80, 169] = reshape(sub2)
|
||||||
|
*/
|
||||||
|
tensor1[0] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 0);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[0], "Create internal tensor fail.", final );
|
||||||
|
tensor1[1] = _create_sigmoid_op(self, tensor1[0]->t);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[1], "Create internal tensor fail.", final );
|
||||||
|
tensor1[2] = _create_confidence_op(self, tensor1[1]->t, outputs[0]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[2], "Create internal tensor fail.", final );
|
||||||
|
tensor1[3] = _create_reshape_op(self, tensor1[2]->t, outputs[0], 80);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[3], "Create internal tensor fail.", final );
|
||||||
|
/**chunk 1**/
|
||||||
|
/*
|
||||||
|
13x13x255 --> 13x13x81, begin: [0, 0, 89, 0] end: [0, 0, 170, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor1[4] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 1);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[4], "Create internal tensor fail.", final );
|
||||||
|
tensor1[5] = _create_sigmoid_op(self, tensor1[4]->t);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[5], "Create internal tensor fail.", final );
|
||||||
|
tensor1[6] = _create_confidence_op(self, tensor1[5]->t, outputs[0]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[6], "Create internal tensor fail.", final );
|
||||||
|
tensor1[7] = _create_reshape_op(self, tensor1[6]->t, outputs[0], 80);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[7], "Create internal tensor fail.", final );
|
||||||
|
/**chunk 2**/
|
||||||
|
/*
|
||||||
|
13x13x255 --> 13x13x81, begin: [0, 0, 174, 0] end: [0, 0, 255, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor1[8] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 2);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[8], "Create internal tensor fail.", final );
|
||||||
|
tensor1[9] = _create_sigmoid_op(self, tensor1[8]->t);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[9], "Create internal tensor fail.", final );
|
||||||
|
tensor1[10] = _create_confidence_op(self, tensor1[9]->t, outputs[0]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[10], "Create internal tensor fail.", final );
|
||||||
|
tensor1[11] = _create_reshape_op(self, tensor1[10]->t, outputs[0], 80);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[11], "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
ret = _create_concat_op(self, tensor0[3]->t, tensor0[7]->t, tensor0[11]->t,
|
||||||
|
tensor1[3]->t, tensor1[7]->t, tensor1[11]->t, outputs[0]);
|
||||||
|
if (ret == FALSE)
|
||||||
|
{
|
||||||
|
VSILOGE("Create concat operation fail");
|
||||||
|
goto final;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = FALSE;
|
||||||
|
/**box**/
|
||||||
|
/*
|
||||||
|
26x26x255 --> 26x26x4, begin: [0, 0, 0, 0] end: [0, 0, 4, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor0[0] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 3);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[0], "Create internal tensor fail.", final );
|
||||||
|
tensor0[1] = _create_box_op(self, tensor0[0]->t, inputs[index_2], outputs[1], 23, 27);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[1], "Create internal tensor fail.", final );
|
||||||
|
tensor0[2] = _create_reshape_op(self, tensor0[1]->t, outputs[1], 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[2], "Create internal tensor fail.", final );
|
||||||
|
/*
|
||||||
|
26x26x255 --> 26x26x4, begin: [0, 0, 85, 0] end: [0, 0, 89, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor0[3] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[3], "Create internal tensor fail.", final );
|
||||||
|
tensor0[4] = _create_box_op(self, tensor0[3]->t, inputs[index_2], outputs[1], 37, 58);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[4], "Create internal tensor fail.", final );
|
||||||
|
tensor0[5] = _create_reshape_op(self, tensor0[4]->t, outputs[1], 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[5], "Create internal tensor fail.", final );
|
||||||
|
/*
|
||||||
|
26x26x255 --> 26x26x4, begin: [0, 0, 85, 0] end: [0, 0, 89, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor0[6] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 5);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[6], "Create internal tensor fail.", final );
|
||||||
|
tensor0[7] = _create_box_op(self, tensor0[6]->t, inputs[index_2], outputs[1], 81, 82);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[7], "Create internal tensor fail.", final );
|
||||||
|
tensor0[8] = _create_reshape_op(self, tensor0[7]->t, outputs[1], 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor0[8], "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
/*
|
||||||
|
13x13x255 --> 13x13x4, begin: [0, 0, 0, 0] end: [0, 0, 4, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor1[0] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 3);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[0], "Create internal tensor fail.", final );
|
||||||
|
tensor1[1] = _create_box_op(self, tensor1[0]->t, inputs[index_3], outputs[1], 81, 82);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[1], "Create internal tensor fail.", final );
|
||||||
|
tensor1[2] = _create_reshape_op(self, tensor1[1]->t, outputs[1], 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[2], "Create internal tensor fail.", final );
|
||||||
|
/*
|
||||||
|
13x13x255 --> 13x13x4, begin: [0, 0, 85, 0] end: [0, 0, 89, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor1[3] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[3], "Create internal tensor fail.", final );
|
||||||
|
tensor1[4] = _create_box_op(self, tensor1[3]->t, inputs[index_3], outputs[1], 135, 169);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[4], "Create internal tensor fail.", final );
|
||||||
|
tensor1[5] = _create_reshape_op(self, tensor1[4]->t, outputs[1], 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[5], "Create internal tensor fail.", final );
|
||||||
|
/*
|
||||||
|
13x13x255 --> 13x13x4, begin: [0, 0, 170, 0] end: [0, 0, 174, 0] stride: [1, 1, 1, 1]
|
||||||
|
*/
|
||||||
|
tensor1[6] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 5);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[6], "Create internal tensor fail.", final );
|
||||||
|
tensor1[7] = _create_box_op(self, tensor1[6]->t, inputs[index_3], outputs[1], 344, 319);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[7], "Create internal tensor fail.", final );
|
||||||
|
tensor1[8] = _create_reshape_op(self, tensor1[7]->t, outputs[1], 4);
|
||||||
|
CHECK_PTR_FAIL_GOTO( tensor1[8], "Create internal tensor fail.", final );
|
||||||
|
|
||||||
|
ret = _create_concat_op(self, tensor0[2]->t, tensor0[5]->t, tensor0[8]->t,
|
||||||
|
tensor1[2]->t, tensor1[5]->t, tensor1[8]->t, outputs[1]);
|
||||||
|
if (ret == FALSE)
|
||||||
|
{
|
||||||
|
VSILOGE("Create concat operation fail");
|
||||||
|
goto final;
|
||||||
|
}
|
||||||
|
|
||||||
|
final:
|
||||||
|
return ret;
|
||||||
|
} /* op_setup() */
|
||||||
|
|
||||||
|
static vsi_status op_init
|
||||||
|
(
|
||||||
|
vsi_nn_node_t* self
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int32_t i = 0;
|
||||||
|
vsi_nn_custom_tiny_yolov4_postprocess_param *p = &self->nn_param.custom_tiny_yolov4_postprocess;
|
||||||
|
p->local = \
|
||||||
|
(custom_tiny_yolov4_postprocess_local_data_t*)malloc(sizeof(custom_tiny_yolov4_postprocess_local_data_t));
|
||||||
|
CHECK_PTR_FAIL_GOTO(p->local, "create buffer fail", final);
|
||||||
|
memset(p->local, 0, sizeof(custom_tiny_yolov4_postprocess_local_data_t));
|
||||||
|
for ( i = 0; i < VSI_NN_MAX_DIM_NUM; i++ )
|
||||||
|
{
|
||||||
|
p->local->stride_dims[i] = 1;
|
||||||
|
}
|
||||||
|
p->local->begin_dims[0][2] = 4;
|
||||||
|
p->local->end_dims[0][2] = 85;
|
||||||
|
|
||||||
|
p->local->begin_dims[1][2] = 89;
|
||||||
|
p->local->end_dims[1][2] = 170;
|
||||||
|
|
||||||
|
p->local->begin_dims[2][2] = 174;
|
||||||
|
p->local->end_dims[2][2] = 255;
|
||||||
|
|
||||||
|
p->local->begin_dims[3][2] = 0;
|
||||||
|
p->local->end_dims[3][2] = 4;
|
||||||
|
|
||||||
|
p->local->begin_dims[4][2] = 85;
|
||||||
|
p->local->end_dims[4][2] = 89;
|
||||||
|
|
||||||
|
p->local->begin_dims[5][2] = 170;
|
||||||
|
p->local->end_dims[5][2] = 174;
|
||||||
|
final:
|
||||||
|
return VSI_SUCCESS;
|
||||||
|
} /* op_init() */
|
||||||
|
|
||||||
|
static vsi_status op_deinit
|
||||||
|
(
|
||||||
|
vsi_nn_node_t* self
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_SUCCESS;
|
||||||
|
|
||||||
|
status = vsi_nn_op_common_deinit(self);
|
||||||
|
|
||||||
|
vsi_nn_safe_free(self->nn_param.custom_tiny_yolov4_postprocess.local);
|
||||||
|
vsi_nn_internal_deinit_node_wksp( self );
|
||||||
|
|
||||||
|
return status;
|
||||||
|
} /* op_deinit() */
|
||||||
|
|
||||||
|
__BEGIN_DECLS
|
||||||
|
|
||||||
|
/* Registrar */
|
||||||
|
DEF_OP_REG
|
||||||
|
(
|
||||||
|
/* op_name */ CUSTOM_TINY_YOLOV4_POSTPROCESS,
|
||||||
|
/* init */ op_init,
|
||||||
|
/* compute */ op_compute,
|
||||||
|
/* deinit */ op_deinit,
|
||||||
|
/* check */ op_check,
|
||||||
|
/* setup */ op_setup,
|
||||||
|
/* optimize */ op_optimize,
|
||||||
|
/* input_num */ _INPUT_NUM,
|
||||||
|
/* output_num */ _OUTPUT_NUM
|
||||||
|
);
|
||||||
|
|
||||||
|
__END_DECLS
|
||||||
|
|
||||||
|
|
@ -35,9 +35,9 @@
|
||||||
#include "utils/vsi_nn_util.h"
|
#include "utils/vsi_nn_util.h"
|
||||||
#include "kernel/vsi_nn_kernel.h"
|
#include "kernel/vsi_nn_kernel.h"
|
||||||
|
|
||||||
typedef struct _bilinear_grid_sample_local_data_t {
|
typedef struct _custom_tiny_yolov4_postprocess_box_local_data_t {
|
||||||
int32_t placeholder;
|
int32_t placeholder;
|
||||||
} bilinear_grid_sample_local_data_t;
|
} custom_tiny_yolov4_postprocess_box_local_data_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Declare number of input and output.
|
Declare number of input and output.
|
||||||
|
|
@ -53,27 +53,25 @@ static vsi_status op_compute
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
|
vsi_nn_kernel_param_t * param = NULL;
|
||||||
vsi_nn_kernel_param_t* param = NULL;
|
float bias_0 = self->nn_param.custom_tiny_yolov4_postprocess_box.bias_0;
|
||||||
int32_t align_corners = self->nn_param.bilinear_grid_sample.align_corners;
|
float bias_1 = self->nn_param.custom_tiny_yolov4_postprocess_box.bias_1;
|
||||||
vsi_nn_kernel_node_t n;
|
|
||||||
|
|
||||||
param = vsi_nn_kernel_param_create();
|
param = vsi_nn_kernel_param_create();
|
||||||
|
|
||||||
vsi_nn_kernel_param_add_int32(param, "align_corners", align_corners);
|
vsi_nn_kernel_param_add_float32( param, "bias_0", bias_0 );
|
||||||
n = vsi_nn_kernel_selector(
|
vsi_nn_kernel_param_add_float32( param, "bias_1", bias_1 );
|
||||||
self->graph, "bilinear_grid_sample", inputs, 2, outputs, 1, param);
|
|
||||||
if (n == NULL) {
|
self->n = vsi_nn_kernel_selector( self->graph, "tiny_yolov4_postprocess_box",
|
||||||
vsi_nn_kernel_param_release(¶m);
|
inputs, _INPUT_NUM, outputs, _OUTPUT_NUM, param );
|
||||||
status = VSI_FAILURE;
|
|
||||||
return status;
|
if ( self->n )
|
||||||
}
|
{
|
||||||
self->n = (vx_node)n;
|
|
||||||
vsi_nn_kernel_param_release(¶m);
|
|
||||||
if (self->n) {
|
|
||||||
status = VSI_SUCCESS;
|
status = VSI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vsi_nn_kernel_param_release( ¶m );
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
} /* op_compute() */
|
} /* op_compute() */
|
||||||
|
|
||||||
|
|
@ -85,6 +83,9 @@ static vsi_bool op_check
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
/*TODO: Check tensor shapes. */
|
/*TODO: Check tensor shapes. */
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} /* op_check() */
|
} /* op_check() */
|
||||||
|
|
||||||
|
|
@ -95,61 +96,36 @@ static vsi_bool op_setup
|
||||||
vsi_nn_tensor_t ** outputs
|
vsi_nn_tensor_t ** outputs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
if (NULL == self) {
|
uint32_t rank = inputs[0]->attr.dim_num;
|
||||||
return FALSE;
|
vsi_bool ret = TRUE;
|
||||||
}
|
|
||||||
|
|
||||||
if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num) {
|
VSI_UNREFERENCED(self);
|
||||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
|
||||||
outputs[0]->attr.size[0] = inputs[1]->attr.size[1];
|
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
||||||
outputs[0]->attr.size[1] = inputs[1]->attr.size[2];
|
{
|
||||||
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
|
outputs[0]->attr.dim_num = rank;
|
||||||
if (4 == inputs[0]->attr.dim_num) {
|
outputs[0]->attr.size[0] = inputs[0]->attr.size[2];
|
||||||
outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
|
outputs[0]->attr.size[1] = inputs[0]->attr.size[0];
|
||||||
|
outputs[0]->attr.size[2] = inputs[0]->attr.size[1];
|
||||||
|
if (rank > 3)
|
||||||
|
{
|
||||||
|
memcpy( &outputs[0]->attr.size[3], &inputs[0]->attr.size[3], (rank - 3) * sizeof(vsi_size_t) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return TRUE;
|
return ret;
|
||||||
} /* op_setup() */
|
} /* op_setup() */
|
||||||
|
|
||||||
static vsi_status op_init
|
|
||||||
(
|
|
||||||
vsi_nn_node_t* self
|
|
||||||
)
|
|
||||||
{
|
|
||||||
/* TODO
|
|
||||||
//self->nn_param.bilinear_grid_sample.local = \
|
|
||||||
// (bilinear_grid_sample_local_data_t*)malloc(sizeof(bilinear_grid_sample_local_data_t));
|
|
||||||
*/
|
|
||||||
|
|
||||||
return VSI_SUCCESS;
|
|
||||||
} /* op_init() */
|
|
||||||
|
|
||||||
static vsi_status op_deinit
|
|
||||||
(
|
|
||||||
vsi_nn_node_t* self
|
|
||||||
)
|
|
||||||
{
|
|
||||||
vsi_status status = VSI_SUCCESS;
|
|
||||||
|
|
||||||
status = vsi_nn_op_common_deinit(self);
|
|
||||||
|
|
||||||
/* TODO
|
|
||||||
//vsi_nn_safe_free(self->nn_param.bilinear_grid_sample.local);
|
|
||||||
*/
|
|
||||||
|
|
||||||
return status;
|
|
||||||
} /* op_deinit() */
|
|
||||||
|
|
||||||
__BEGIN_DECLS
|
__BEGIN_DECLS
|
||||||
|
|
||||||
/* Registrar */
|
/* Registrar */
|
||||||
DEF_OP_REG
|
DEF_OP_REG
|
||||||
(
|
(
|
||||||
/* op_name */ BILINEAR_GRID_SAMPLE,
|
/* op_name */ CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX,
|
||||||
/* init */ op_init,
|
/* init */ NULL,
|
||||||
/* compute */ op_compute,
|
/* compute */ op_compute,
|
||||||
/* deinit */ op_deinit,
|
/* deinit */ vsi_nn_op_common_deinit,
|
||||||
/* check */ op_check,
|
/* check */ op_check,
|
||||||
/* setup */ op_setup,
|
/* setup */ op_setup,
|
||||||
/* optimize */ NULL,
|
/* optimize */ NULL,
|
||||||
|
|
@ -0,0 +1,127 @@
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Vivante Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "vsi_nn_types.h"
|
||||||
|
#include "vsi_nn_log.h"
|
||||||
|
#include "vsi_nn_node.h"
|
||||||
|
#include "vsi_nn_prv.h"
|
||||||
|
#include "vsi_nn_ops.h"
|
||||||
|
#include "vsi_nn_tensor.h"
|
||||||
|
#include "utils/vsi_nn_util.h"
|
||||||
|
#include "kernel/vsi_nn_kernel.h"
|
||||||
|
|
||||||
|
typedef struct _tiny_yolov4_postprocess_confidence_local_data_t {
|
||||||
|
int32_t placeholder;
|
||||||
|
} tiny_yolov4_postprocess_confidence_local_data_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Declare number of input and output.
|
||||||
|
*/
|
||||||
|
#define _INPUT_NUM (1)
|
||||||
|
#define _OUTPUT_NUM (1)
|
||||||
|
|
||||||
|
static vsi_status op_compute
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
vsi_nn_tensor_t ** outputs
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
|
||||||
|
self->n = vsi_nn_kernel_selector( self->graph, "tiny_yolov4_postprocess_confidence",
|
||||||
|
inputs, 1, outputs, 1, NULL );
|
||||||
|
|
||||||
|
if ( self->n )
|
||||||
|
{
|
||||||
|
status = VSI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
} /* op_compute() */
|
||||||
|
|
||||||
|
static vsi_bool op_check
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
vsi_nn_tensor_t ** outputs
|
||||||
|
)
|
||||||
|
{
|
||||||
|
/*TODO: Check tensor shapes. */
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
|
return TRUE;
|
||||||
|
} /* op_check() */
|
||||||
|
|
||||||
|
static vsi_bool op_setup
|
||||||
|
(
|
||||||
|
vsi_nn_node_t * self,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
vsi_nn_tensor_t ** outputs
|
||||||
|
)
|
||||||
|
{
|
||||||
|
uint32_t rank = inputs[0]->attr.dim_num;
|
||||||
|
vsi_bool ret = TRUE;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
|
||||||
|
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
||||||
|
{
|
||||||
|
outputs[0]->attr.dim_num = rank;
|
||||||
|
outputs[0]->attr.size[0] = inputs[0]->attr.size[2] - 1;
|
||||||
|
outputs[0]->attr.size[1] = inputs[0]->attr.size[0];
|
||||||
|
outputs[0]->attr.size[2] = inputs[0]->attr.size[1];
|
||||||
|
if (rank > 3)
|
||||||
|
{
|
||||||
|
memcpy( &outputs[0]->attr.size[3], &inputs[0]->attr.size[3], (rank - 3) * sizeof(vsi_size_t) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
} /* op_setup() */
|
||||||
|
|
||||||
|
|
||||||
|
__BEGIN_DECLS
|
||||||
|
|
||||||
|
/* Registrar */
|
||||||
|
DEF_OP_REG
|
||||||
|
(
|
||||||
|
/* op_name */ CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE,
|
||||||
|
/* init */ NULL,
|
||||||
|
/* compute */ op_compute,
|
||||||
|
/* deinit */ vsi_nn_op_common_deinit,
|
||||||
|
/* check */ op_check,
|
||||||
|
/* setup */ op_setup,
|
||||||
|
/* optimize */ NULL,
|
||||||
|
/* input_num */ _INPUT_NUM,
|
||||||
|
/* output_num */ _OUTPUT_NUM
|
||||||
|
);
|
||||||
|
|
||||||
|
__END_DECLS
|
||||||
|
|
||||||
|
|
@ -54,20 +54,26 @@ DEF_KERNEL_EXECUTOR(_softmax_compute)
|
||||||
size_t param_size
|
size_t param_size
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VX_SUCCESS;
|
vsi_status status = VSI_FAILURE;
|
||||||
float *buffer[_CPU_IO_NUM] = {NULL};
|
float *buffer[_CPU_IO_NUM] = {NULL};
|
||||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = {NULL};
|
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = {NULL};
|
||||||
vsi_nn_kernel_tensor_attr_t *attr[_CPU_IO_NUM] = {NULL};
|
vsi_nn_kernel_tensor_attr_t *attr[_CPU_IO_NUM] = {NULL};
|
||||||
uint32_t i = 0, out_elements = 0;
|
uint32_t i = 0, out_elements = 0;
|
||||||
int32_t axis;
|
int32_t axis;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; // input0
|
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; // input0
|
||||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; // input1
|
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; // input1
|
||||||
tensors[2] = (vsi_nn_kernel_tensor_t)param[2]; // output
|
tensors[2] = (vsi_nn_kernel_tensor_t)param[2]; // output
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create(tensors[0]);
|
attr[0] = vsi_nn_kernel_tensor_attr_create(tensors[0]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create(tensors[1]);
|
attr[1] = vsi_nn_kernel_tensor_attr_create(tensors[1]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
attr[2] = vsi_nn_kernel_tensor_attr_create(tensors[2]);
|
attr[2] = vsi_nn_kernel_tensor_attr_create(tensors[2]);
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
|
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
|
||||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
|
|
@ -133,6 +139,8 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_t* kernel
|
vsi_nn_kernel_t* kernel
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||||
return VSI_SUCCESS;
|
return VSI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
@ -153,6 +161,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
int32_t axis = 0;
|
int32_t axis = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||||
status = _query_kernel(inputs, outputs, kernel);
|
status = _query_kernel(inputs, outputs, kernel);
|
||||||
if(status != VSI_SUCCESS)
|
if(status != VSI_SUCCESS)
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ DEF_KERNEL_EXECUTOR(_softmax_exec)
|
||||||
size_t param_size
|
size_t param_size
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VX_SUCCESS;
|
vsi_status status = VSI_FAILURE;
|
||||||
float* buffer[_CPU_IO_NUM] = { NULL };
|
float* buffer[_CPU_IO_NUM] = { NULL };
|
||||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||||
vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL };
|
vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL };
|
||||||
|
|
@ -64,11 +64,16 @@ DEF_KERNEL_EXECUTOR(_softmax_exec)
|
||||||
float fMax = 0.0;
|
float fMax = 0.0;
|
||||||
float fProbSum = 0.0f;
|
float fProbSum = 0.0f;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &sf_axis);
|
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &sf_axis);
|
||||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
|
|
@ -141,6 +146,8 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_t* kernel
|
vsi_nn_kernel_t* kernel
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||||
return VSI_SUCCESS;
|
return VSI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
@ -161,6 +168,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
int32_t axis = 0;
|
int32_t axis = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||||
|
|
||||||
status = _query_kernel( inputs, outputs, kernel );
|
status = _query_kernel( inputs, outputs, kernel );
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,7 @@ static vx_param_description_t _custom_warp_affine_kernel_param_def[] =
|
||||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
// Add kererl parameters here
|
// Add kererl parameters here
|
||||||
};
|
};
|
||||||
#define _CUSTOM_WARP_AFFINE_PARAM_NUM _cnt_of_array( _custom_warp_affine_kernel_param_def )
|
#define _CUSTOM_WARP_AFFINE_PARAM_NUM _cnt_of_array( _custom_warp_affine_kernel_param_def )
|
||||||
|
|
@ -97,7 +98,7 @@ static vsi_bool _read_pixel
|
||||||
|
|
||||||
if (out_of_bounds)
|
if (out_of_bounds)
|
||||||
{
|
{
|
||||||
*pixel = 205.0f;
|
*pixel = 0.0f;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -125,6 +126,7 @@ DEF_KERNEL_EXECUTOR(_compute)
|
||||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||||
vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL };
|
vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL };
|
||||||
int32_t type = 0;
|
int32_t type = 0;
|
||||||
|
int32_t rgb_type = 0;
|
||||||
float matrix[6] = {0};
|
float matrix[6] = {0};
|
||||||
vsi_size_t i = 0;
|
vsi_size_t i = 0;
|
||||||
vsi_size_t b = 0;
|
vsi_size_t b = 0;
|
||||||
|
|
@ -135,11 +137,16 @@ DEF_KERNEL_EXECUTOR(_compute)
|
||||||
vsi_size_t height = 0;
|
vsi_size_t height = 0;
|
||||||
vsi_size_t outer_size = 1;
|
vsi_size_t outer_size = 1;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
|
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
|
||||||
|
|
||||||
|
|
@ -153,6 +160,7 @@ DEF_KERNEL_EXECUTOR(_compute)
|
||||||
|
|
||||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_TYPE],
|
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_TYPE],
|
||||||
&type);
|
&type);
|
||||||
|
status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[9], &rgb_type);
|
||||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
for (i = 0; i < 6; i++)
|
for (i = 0; i < 6; i++)
|
||||||
{
|
{
|
||||||
|
|
@ -172,34 +180,95 @@ DEF_KERNEL_EXECUTOR(_compute)
|
||||||
{
|
{
|
||||||
float *src_base = buffer[0] + b * attr[0]->shape->data[0] * attr[0]->shape->data[1];
|
float *src_base = buffer[0] + b * attr[0]->shape->data[0] * attr[0]->shape->data[1];
|
||||||
float *dst_base = buffer[1] + b * width * height;
|
float *dst_base = buffer[1] + b * width * height;
|
||||||
for (y = 0; y < height; y++)
|
|
||||||
|
if ( rgb_type == VSI_NN_WARP_AFFINE_TYPE_RGB )
|
||||||
{
|
{
|
||||||
for (x = 0; x < width; x++)
|
width = width / 3;
|
||||||
|
for (y = 0; y < height; y++)
|
||||||
{
|
{
|
||||||
float xf = 0;
|
for (x = 0; x < width; x++)
|
||||||
float yf = 0;
|
|
||||||
float dst = 0;
|
|
||||||
|
|
||||||
_transform_affine(x, y, matrix, &xf, &yf);
|
|
||||||
if (type == VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR)
|
|
||||||
{
|
{
|
||||||
_read_pixel(src_base, attr[0], xf, yf, &dst);
|
float xf = 0;
|
||||||
dst_base[y * width + x] = dst;
|
float yf = 0;
|
||||||
|
float dst = 0;
|
||||||
|
|
||||||
|
_transform_affine(x, y, matrix, &xf, &yf);
|
||||||
|
|
||||||
|
if (type == VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR)
|
||||||
|
{
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf), floorf(yf), &dst);
|
||||||
|
dst_base[y * 3 * width + 3 * x] = dst;
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 1, floorf(yf), &dst);
|
||||||
|
dst_base[y * 3 * width + 3 * x + 1] = dst;
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 2, floorf(yf), &dst);
|
||||||
|
dst_base[y * 3 * width + 3 * x + 2] = dst;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
float tl = 0, tr = 0, bl = 0, br = 0;
|
||||||
|
float ar = xf - floorf(xf);
|
||||||
|
float ab = yf - floorf(yf);
|
||||||
|
float al = 1.0f - ar;
|
||||||
|
float at = 1.0f - ab;
|
||||||
|
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf), floorf(yf), &tl);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1), floorf(yf), &tr);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf), floorf(yf) + 1, &bl);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1), floorf(yf) + 1, &br);
|
||||||
|
|
||||||
|
dst_base[y * 3 * width + 3 * x] =
|
||||||
|
tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
|
||||||
|
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 1, floorf(yf), &tl);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 1, floorf(yf), &tr);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 1, floorf(yf) + 1, &bl);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 1, floorf(yf) + 1, &br);
|
||||||
|
|
||||||
|
dst_base[y * 3 * width + 3 * x + 1] =
|
||||||
|
tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
|
||||||
|
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 2, floorf(yf), &tl);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 2, floorf(yf), &tr);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 2, floorf(yf) + 1, &bl);
|
||||||
|
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 2, floorf(yf) + 1, &br);
|
||||||
|
|
||||||
|
dst_base[y * 3 * width + 3 * x + 2] =
|
||||||
|
tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (y = 0; y < height; y++)
|
||||||
|
{
|
||||||
|
for (x = 0; x < width; x++)
|
||||||
{
|
{
|
||||||
float tl = 0, tr = 0, bl = 0, br = 0;
|
float xf = 0;
|
||||||
float ar = xf - floorf(xf);
|
float yf = 0;
|
||||||
float ab = yf - floorf(yf);
|
float dst = 0;
|
||||||
float al = 1.0f - ar;
|
|
||||||
float at = 1.0f - ab;
|
|
||||||
|
|
||||||
_read_pixel(src_base, attr[0], floorf(xf), floorf(yf), &tl);
|
_transform_affine(x, y, matrix, &xf, &yf);
|
||||||
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf), &tr);
|
if (type == VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR)
|
||||||
_read_pixel(src_base, attr[0], floorf(xf), floorf(yf) + 1, &bl);
|
{
|
||||||
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf) + 1, &br);
|
_read_pixel(src_base, attr[0], xf, yf, &dst);
|
||||||
|
dst_base[y * width + x] = dst;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
float tl = 0, tr = 0, bl = 0, br = 0;
|
||||||
|
float ar = xf - floorf(xf);
|
||||||
|
float ab = yf - floorf(yf);
|
||||||
|
float al = 1.0f - ar;
|
||||||
|
float at = 1.0f - ab;
|
||||||
|
|
||||||
dst_base[y * width + x] = tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
|
_read_pixel(src_base, attr[0], floorf(xf), floorf(yf), &tl);
|
||||||
|
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf), &tr);
|
||||||
|
_read_pixel(src_base, attr[0], floorf(xf), floorf(yf) + 1, &bl);
|
||||||
|
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf) + 1, &br);
|
||||||
|
|
||||||
|
dst_base[y * width + x] = tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -233,6 +302,8 @@ static vsi_status _query_kernel
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||||
kernel->info.function = _compute;
|
kernel->info.function = _compute;
|
||||||
kernel->info.parameters = _custom_warp_affine_kernel_param_def;
|
kernel->info.parameters = _custom_warp_affine_kernel_param_def;
|
||||||
|
|
@ -260,6 +331,7 @@ static vsi_nn_kernel_node_t _setup
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
size_t buffer_size = 0;
|
size_t buffer_size = 0;
|
||||||
int32_t type = vsi_nn_kernel_param_get_int32( params, "type");
|
int32_t type = vsi_nn_kernel_param_get_int32( params, "type");
|
||||||
|
int32_t rgb_type = vsi_nn_kernel_param_get_int32( params, "rgb_type");
|
||||||
float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size );
|
float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size );
|
||||||
|
|
||||||
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
|
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
|
||||||
|
|
@ -278,6 +350,8 @@ static vsi_nn_kernel_node_t _setup
|
||||||
node_params[SCALAR_MATRIX_OFFSET + i] = vsi_nn_kernel_scalar_create(
|
node_params[SCALAR_MATRIX_OFFSET + i] = vsi_nn_kernel_scalar_create(
|
||||||
graph, F32, &buffer[i] );
|
graph, F32, &buffer[i] );
|
||||||
}
|
}
|
||||||
|
node_params[9] = vsi_nn_kernel_scalar_create(
|
||||||
|
graph, I32, &rgb_type );
|
||||||
|
|
||||||
/* Pass parameters to node. */
|
/* Pass parameters to node. */
|
||||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _CUSTOM_WARP_AFFINE_PARAM_NUM );
|
status = vsi_nn_kernel_node_pass_param( node, node_params, _CUSTOM_WARP_AFFINE_PARAM_NUM );
|
||||||
|
|
@ -286,6 +360,7 @@ static vsi_nn_kernel_node_t _setup
|
||||||
{
|
{
|
||||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] );
|
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] );
|
||||||
}
|
}
|
||||||
|
vsi_nn_kernel_scalar_release( &node_params[9] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return node;
|
return node;
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ static vsi_bool _read_pixel
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_size_t width = attr->shape->data[0];
|
vsi_size_t width = attr->shape->data[0];
|
||||||
vsi_size_t height = attr->shape->data[1];
|
vsi_size_t height = attr->shape->size > 1 ? attr->shape->data[1] : 1;
|
||||||
vsi_bool out_of_bounds = (x < 0 || y < 0 || x >= width || y >= height);
|
vsi_bool out_of_bounds = (x < 0 || y < 0 || x >= width || y >= height);
|
||||||
vsi_size_t bx = 0, by = 0;
|
vsi_size_t bx = 0, by = 0;
|
||||||
|
|
||||||
|
|
@ -139,11 +139,16 @@ DEF_KERNEL_EXECUTOR(_compute)
|
||||||
vsi_size_t height = 0;
|
vsi_size_t height = 0;
|
||||||
vsi_size_t outer_size = 1;
|
vsi_size_t outer_size = 1;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
|
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
|
||||||
|
|
||||||
|
|
@ -237,6 +242,8 @@ static vsi_status _query_kernel
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||||
kernel->info.function = _compute;
|
kernel->info.function = _compute;
|
||||||
kernel->info.parameters = _custom_warp_perspective_kernel_param_def;
|
kernel->info.parameters = _custom_warp_perspective_kernel_param_def;
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,8 @@ DEF_KERNEL_INITIALIZER(_softmax_initializer)
|
||||||
{0, 0, 0}, // local_size: local group size in thread
|
{0, 0, 0}, // local_size: local group size in thread
|
||||||
{0, 0, 0}}; // global_size: image size in thread
|
{0, 0, 0}}; // global_size: image size in thread
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
if (!attr)
|
if (!attr)
|
||||||
{
|
{
|
||||||
|
|
@ -144,6 +146,8 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_t* kernel
|
vsi_nn_kernel_t* kernel
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||||
|
|
||||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
||||||
|
|
@ -170,6 +174,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
int32_t axis = 0;
|
int32_t axis = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||||
|
|
||||||
status = _query_kernel( inputs, outputs, kernel );
|
status = _query_kernel( inputs, outputs, kernel );
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,357 @@
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Vivante Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "vsi_nn_types.h"
|
||||||
|
#include "vsi_nn_tensor.h"
|
||||||
|
#include "vsi_nn_graph.h"
|
||||||
|
#include "vsi_nn_log.h"
|
||||||
|
#include "vsi_nn_error.h"
|
||||||
|
#include "vsi_nn_prv.h"
|
||||||
|
#include "vsi_nn_tensor_util.h"
|
||||||
|
#include "utils/vsi_nn_util.h"
|
||||||
|
#include "kernel/vsi_nn_kernel.h"
|
||||||
|
|
||||||
|
__BEGIN_DECLS
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define kernel meta.
|
||||||
|
*/
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
INTERNAL_KERNEL_TINY_YOLOV4_POSTPROCESS_BOX,
|
||||||
|
} _internal_kernel_e;
|
||||||
|
|
||||||
|
#define _SOURCE "tiny_yolov4_postprocess_box"
|
||||||
|
#define _KERNEL_NAME CVIVANTE_NAMESPACE("evis.tiny_yolov4_postprocess_box_U8_U8toU8")
|
||||||
|
|
||||||
|
// Add kernel hashtable here
|
||||||
|
#define TINY_YOLOV4_POSTPROCESS_BOX_HASH_KEY( IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||||
|
(( IN0_DTYPE ) | ( IN1_DTYPE << 8 ) | ( OUT_DTYPE << 16 ))
|
||||||
|
#define PACK_KERNEL_MAP( IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||||
|
{ TINY_YOLOV4_POSTPROCESS_BOX_HASH_KEY( IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ), \
|
||||||
|
_KERNEL_NAME, _SOURCE }
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint32_t key;
|
||||||
|
char * function_name;
|
||||||
|
const char * source_name;
|
||||||
|
} _kernel_map_type;
|
||||||
|
|
||||||
|
static const _kernel_map_type _tiny_yolov4_postprocess_box_kernel_map[] =
|
||||||
|
{
|
||||||
|
// Register kernel here
|
||||||
|
PACK_KERNEL_MAP( U8, U8, U8 ),
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Kernel params
|
||||||
|
*/
|
||||||
|
static vx_param_description_t _tiny_yolov4_postprocess_box_kernel_param_def[] =
|
||||||
|
{
|
||||||
|
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
// Add kererl parameters here
|
||||||
|
};
|
||||||
|
#define _TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM _cnt_of_array( _tiny_yolov4_postprocess_box_kernel_param_def )
|
||||||
|
#define SCALAR_BIAS_0_VALUE (3)
|
||||||
|
#define SCALAR_BIAS_1_VALUE (4)
|
||||||
|
/*
|
||||||
|
* Kernel initializer
|
||||||
|
*/
|
||||||
|
DEF_KERNEL_INITIALIZER(_tiny_yolov4_postprocess_box_initializer)
|
||||||
|
(
|
||||||
|
vsi_nn_kernel_node_t node,
|
||||||
|
const vsi_nn_kernel_node_param_t * param,
|
||||||
|
size_t param_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
gpu_param_t gpu_param = {
|
||||||
|
3,
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0}
|
||||||
|
};
|
||||||
|
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
||||||
|
float CONST2 = 16.0f;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
|
attr[2] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
|
// Add initializer
|
||||||
|
gpu_param.dim = 2;
|
||||||
|
gpu_param.global_scale[0] = 4;
|
||||||
|
gpu_param.global_scale[1] = 1;
|
||||||
|
gpu_param.global_size[0] = gpu_align_p2(
|
||||||
|
(attr[0]->shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||||
|
/ gpu_param.global_scale[0], 8);
|
||||||
|
gpu_param.global_size[1] = 1;
|
||||||
|
|
||||||
|
if (attr[0]->shape->data[0] == 13 * 13)
|
||||||
|
{
|
||||||
|
CONST2 = 32.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (attr[0]->dtype == U8 && attr[1]->dtype == U8 && attr[2]->dtype == U8)
|
||||||
|
{
|
||||||
|
float input0_scale = attr[0]->scale;
|
||||||
|
float input0_tail = 0 - (float)attr[0]->zero_point * input0_scale;
|
||||||
|
float input1_scale = attr[1]->scale;
|
||||||
|
float input1_tail = 0 - (float)attr[1]->zero_point * input1_scale;
|
||||||
|
float output_scale = 1.0f / attr[2]->scale;
|
||||||
|
float output_zp = (float)attr[2]->zero_point;
|
||||||
|
gpu_dp_inst_t uniExtract8Data_2x8 = {{
|
||||||
|
0x33333333, // TCfg
|
||||||
|
0x11110000, // ASelt
|
||||||
|
0x03020100, 0x03020100, // ABin
|
||||||
|
0x00000000, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00002400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
gpu_dp_inst_t uniDatatoFloat32_0_4x4 = {{
|
||||||
|
0x01010101, // TCfg
|
||||||
|
0x00000000, // ASelt
|
||||||
|
0x00010000, 0x00030002, // ABin
|
||||||
|
0x02020202, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00000400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000001, 0x00000000, 0x00000001, 0x00000000,
|
||||||
|
0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
gpu_dp_inst_t uniDatatoFloat32_1_4x4 = {{
|
||||||
|
0x01010101, // TCfg
|
||||||
|
0x00000000, // ASelt
|
||||||
|
0x00050004, 0x00070006, // ABin
|
||||||
|
0x02020202, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00000400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000001, 0x00000000, 0x00000001, 0x00000000,
|
||||||
|
0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
gpu_dp_inst_t uniDataTranspose_0_2x8 = {{
|
||||||
|
0x11111111, // TCfg
|
||||||
|
0x00000000, // ASelt
|
||||||
|
0x0c080400, 0x0d090501, // ABin
|
||||||
|
0x22222222, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00000400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
||||||
|
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
gpu_dp_inst_t uniDataTranspose_1_2x8 = {{
|
||||||
|
0x11111111, // TCfg
|
||||||
|
0x00000000, // ASelt
|
||||||
|
0x0e0a0602, 0x0f0b0703, // ABin
|
||||||
|
0x22222222, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00000400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
||||||
|
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
|
||||||
|
status = vsi_nn_kernel_gpu_add_param( node, "uniDatatoFloat32_0_4x4", &uniDatatoFloat32_0_4x4);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "uniDatatoFloat32_1_4x4", &uniDatatoFloat32_1_4x4);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "uniExtract8Data_2x8", &uniExtract8Data_2x8);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "uniDataTranspose_0_2x8", &uniDataTranspose_0_2x8);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "uniDataTranspose_1_2x8", &uniDataTranspose_1_2x8);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "input0_scale", &input0_scale);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "input0_tail", &input0_tail);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "input1_scale", &input1_scale);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "input1_tail", &input1_tail);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "output_scale", &output_scale);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "output_zp", &output_zp);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "CONST2", &CONST2);
|
||||||
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
|
}
|
||||||
|
|
||||||
|
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||||
|
|
||||||
|
final:
|
||||||
|
if (attr[0])
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_tensor_attr_release( &attr[0] );
|
||||||
|
}
|
||||||
|
if (attr[1])
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_tensor_attr_release( &attr[1] );
|
||||||
|
}
|
||||||
|
if (attr[2])
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_tensor_attr_release( &attr[2] );
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
} /* _tiny_yolov4_postprocess_box_initializer() */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Query kernel
|
||||||
|
*/
|
||||||
|
static vsi_status _query_kernel
|
||||||
|
(
|
||||||
|
vsi_nn_kernel_t * kernel,
|
||||||
|
vsi_nn_tensor_t * const * const inputs,
|
||||||
|
vsi_nn_tensor_t * const * const outputs
|
||||||
|
/* Add extra params */
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
vsi_nn_kernel_dtype_e in0_dtype;
|
||||||
|
vsi_nn_kernel_dtype_e in1_dtype;
|
||||||
|
vsi_nn_kernel_dtype_e out_dtype;
|
||||||
|
const _kernel_map_type * kernel_map = _tiny_yolov4_postprocess_box_kernel_map;
|
||||||
|
size_t kernel_map_size = _cnt_of_array( _tiny_yolov4_postprocess_box_kernel_map );
|
||||||
|
vx_param_description_t * param_def = _tiny_yolov4_postprocess_box_kernel_param_def;
|
||||||
|
vx_kernel_initialize_f initializer = _tiny_yolov4_postprocess_box_initializer;
|
||||||
|
|
||||||
|
uint32_t key;
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
|
in1_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
|
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
||||||
|
key = TINY_YOLOV4_POSTPROCESS_BOX_HASH_KEY( in0_dtype, in1_dtype, out_dtype );
|
||||||
|
|
||||||
|
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||||
|
{
|
||||||
|
if ( kernel_map[i].key == key )
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( i < (uint32_t)kernel_map_size )
|
||||||
|
{
|
||||||
|
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||||
|
kernel->info.parameters = param_def;
|
||||||
|
kernel->info.numParams = _cnt_of_array( _tiny_yolov4_postprocess_box_kernel_param_def );
|
||||||
|
kernel->info.initialize = initializer;
|
||||||
|
// Register code source
|
||||||
|
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
||||||
|
"vsi_nn_kernel_header",
|
||||||
|
kernel_map[i].source_name );
|
||||||
|
// Register binary source
|
||||||
|
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||||
|
kernel_map[i].source_name );
|
||||||
|
status = VSI_SUCCESS;
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
} /* _query_kernel() */
|
||||||
|
|
||||||
|
|
||||||
|
static vsi_nn_kernel_node_t _setup
|
||||||
|
(
|
||||||
|
vsi_nn_graph_t * graph,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
size_t input_num,
|
||||||
|
vsi_nn_tensor_t ** outputs,
|
||||||
|
size_t output_num,
|
||||||
|
const vsi_nn_kernel_param_t * params,
|
||||||
|
vsi_nn_kernel_t * kernel
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
vsi_nn_kernel_node_param_t node_params[_TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM];
|
||||||
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
|
vsi_size_t shape[3][VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||||
|
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
|
||||||
|
float bias_0 = vsi_nn_kernel_param_get_float32( params, "bias_0" );
|
||||||
|
float bias_1 = vsi_nn_kernel_param_get_float32( params, "bias_1" );
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
|
||||||
|
memcpy(shape[0], inputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
|
||||||
|
shape[0][0] = shape[0][0] * shape[0][1];
|
||||||
|
shape[0][1] = shape[0][2];
|
||||||
|
shape[0][2] = 1;
|
||||||
|
|
||||||
|
memcpy(shape[1], inputs[1]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
|
||||||
|
shape[1][0] = shape[1][0] * shape[1][1];
|
||||||
|
shape[1][1] = shape[1][2];
|
||||||
|
shape[1][2] = 1;
|
||||||
|
|
||||||
|
memcpy(shape[2], outputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
|
||||||
|
shape[2][0] = shape[2][0];
|
||||||
|
shape[2][1] = shape[2][2] * shape[2][1];
|
||||||
|
shape[2][2] = 1;
|
||||||
|
|
||||||
|
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||||
|
inputs[0], shape[0], inputs[0]->attr.dim_num );
|
||||||
|
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||||
|
inputs[1], shape[1], inputs[1]->attr.dim_num );
|
||||||
|
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
|
||||||
|
outputs[0], shape[2], outputs[0]->attr.dim_num );
|
||||||
|
|
||||||
|
if ( !vsi_nn_kernel_gpu_check_shape(
|
||||||
|
reshape_tensors[0]->attr.size, reshape_tensors[0]->attr.dim_num ) )
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
|
||||||
|
if ( VSI_SUCCESS == status)
|
||||||
|
{
|
||||||
|
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||||
|
if ( node )
|
||||||
|
{
|
||||||
|
/* Set inputs and outputs */
|
||||||
|
vsi_nn_kernel_node_pack_io( node_params, _TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM,
|
||||||
|
reshape_tensors, input_num, &reshape_tensors[2], output_num );
|
||||||
|
/* Pass parameters to node. */
|
||||||
|
node_params[SCALAR_BIAS_0_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &bias_0 );
|
||||||
|
node_params[SCALAR_BIAS_1_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &bias_1 );
|
||||||
|
status = vsi_nn_kernel_node_pass_param( node, node_params, _TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM );
|
||||||
|
vsi_nn_kernel_scalar_release( &node_params[SCALAR_BIAS_0_VALUE] );
|
||||||
|
vsi_nn_kernel_scalar_release( &node_params[SCALAR_BIAS_1_VALUE] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vsi_safe_release_tensor( reshape_tensors[0] );
|
||||||
|
vsi_safe_release_tensor( reshape_tensors[1] );
|
||||||
|
vsi_safe_release_tensor( reshape_tensors[2] );
|
||||||
|
|
||||||
|
return node;
|
||||||
|
} /* _setup() */
|
||||||
|
|
||||||
|
__END_DECLS
|
||||||
|
|
||||||
|
REGISTER_BACKEND_EVIS( tiny_yolov4_postprocess_box, _setup )
|
||||||
|
|
||||||
|
|
@ -0,0 +1,320 @@
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Vivante Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "vsi_nn_types.h"
|
||||||
|
#include "vsi_nn_tensor.h"
|
||||||
|
#include "vsi_nn_graph.h"
|
||||||
|
#include "vsi_nn_log.h"
|
||||||
|
#include "vsi_nn_error.h"
|
||||||
|
#include "vsi_nn_prv.h"
|
||||||
|
#include "vsi_nn_tensor_util.h"
|
||||||
|
#include "utils/vsi_nn_util.h"
|
||||||
|
#include "kernel/vsi_nn_kernel.h"
|
||||||
|
|
||||||
|
__BEGIN_DECLS
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define kernel meta.
|
||||||
|
*/
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
INTERNAL_KERNEL_TINY_YOLOV4_POSTPROCESS_CONFIDENCE,
|
||||||
|
} _internal_kernel_e;
|
||||||
|
|
||||||
|
#define _SOURCE "tiny_yolov4_postprocess_confidence"
|
||||||
|
#define _KERNEL_NAME CVIVANTE_NAMESPACE("evis.tiny_yolov4_postprocess_conf_U8toU8")
|
||||||
|
|
||||||
|
// Add kernel hashtable here
|
||||||
|
#define _CONFIDENCE_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
|
||||||
|
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
|
||||||
|
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
|
||||||
|
{ _CONFIDENCE_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
|
||||||
|
_KERNEL_NAME, _SOURCE }
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint32_t key;
|
||||||
|
char * function_name;
|
||||||
|
const char * source_name;
|
||||||
|
} _kernel_map_type;
|
||||||
|
|
||||||
|
static const _kernel_map_type _tiny_yolov4_postprocess_confidence_kernel_map[] =
|
||||||
|
{
|
||||||
|
// Register kernel here
|
||||||
|
PACK_KERNEL_MAP( U8, U8 ),
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Kernel params
|
||||||
|
*/
|
||||||
|
static vx_param_description_t _tiny_yolov4_postprocess_confidence_kernel_param_def[] =
|
||||||
|
{
|
||||||
|
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||||
|
// Add kererl parameters here
|
||||||
|
};
|
||||||
|
#define _TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM \
|
||||||
|
_cnt_of_array( _tiny_yolov4_postprocess_confidence_kernel_param_def )
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Kernel initializer
|
||||||
|
*/
|
||||||
|
DEF_KERNEL_INITIALIZER(_tiny_yolov4_postprocess_confidence_initializer)
|
||||||
|
(
|
||||||
|
vsi_nn_kernel_node_t node,
|
||||||
|
const vsi_nn_kernel_node_param_t * param,
|
||||||
|
size_t param_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
gpu_param_t gpu_param = {
|
||||||
|
3,
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0}
|
||||||
|
};
|
||||||
|
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
gpu_param.dim = 2;
|
||||||
|
gpu_param.global_scale[0] = 4;
|
||||||
|
gpu_param.global_scale[1] = 4;
|
||||||
|
gpu_param.global_size[0] = gpu_align_p2(
|
||||||
|
(attr[0]->shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||||
|
/ gpu_param.global_scale[0], 4);
|
||||||
|
gpu_param.global_size[1] = (
|
||||||
|
(attr[1]->shape->data[0] + gpu_param.global_scale[1] - 1)
|
||||||
|
/ gpu_param.global_scale[1]);
|
||||||
|
|
||||||
|
if (attr[0]->dtype == U8 && attr[1]->dtype == U8)
|
||||||
|
{
|
||||||
|
float output_scale = attr[0]->scale * attr[0]->scale / attr[1]->scale;
|
||||||
|
int output_zp = attr[1]->zero_point;
|
||||||
|
uint16_t M0 = 0;
|
||||||
|
int32_t postShift = 0;
|
||||||
|
int32_t i = 0;
|
||||||
|
|
||||||
|
gpu_dp_inst_t uniU8TimesU8_0_4x4 = {{
|
||||||
|
0x01010101, // TCfg
|
||||||
|
0x00000000, // ASelt
|
||||||
|
0x00010000, 0x00030002, // ABin
|
||||||
|
0x01010101, // BSelt
|
||||||
|
0x00010000, 0x00030002, // BBin
|
||||||
|
0x00000400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
gpu_dp_inst_t uniU16TimesMultiplier_PostShift_2x8 = {{
|
||||||
|
0x11111111, // TCfg
|
||||||
|
0x00000000, // ASelt
|
||||||
|
0x03020100, 0x07060504, // ABin
|
||||||
|
0x22222222, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00000600, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
||||||
|
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
gpu_dp_inst_t uniU8PlusU8_trans_0_2x8 = {{
|
||||||
|
0xffffffff, // TCfg
|
||||||
|
0x44444444, // ASelt
|
||||||
|
0x0c080400, 0x0d090501, // ABin
|
||||||
|
0x00000000, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00007400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
gpu_dp_inst_t uniU8PlusU8_trans_1_2x8 = {{
|
||||||
|
0xffffffff, // TCfg
|
||||||
|
0x44444444, // ASelt
|
||||||
|
0x0e0a0602, 0x0f0b0703, // ABin
|
||||||
|
0x00000000, // BSelt
|
||||||
|
0x00000000, 0x00000000, // BBin
|
||||||
|
0x00007400, // AccumType, ConstantType, and PostShift
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||||
|
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||||
|
}, GPU_DP_TYPE_16};
|
||||||
|
|
||||||
|
gpu_quantize_multiplier_16bit((double)output_scale, &M0, &postShift);
|
||||||
|
|
||||||
|
uniU16TimesMultiplier_PostShift_2x8.data[7] |= (postShift & 0x1F);
|
||||||
|
for ( i = 8; i < 16; i++ )
|
||||||
|
{
|
||||||
|
uniU16TimesMultiplier_PostShift_2x8.data[i] = M0;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = vsi_nn_kernel_gpu_add_param( node, "uniU8TimesU8_0_4x4", &uniU8TimesU8_0_4x4);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "uniU16TimesMultiplier_PostShift_2x8",
|
||||||
|
&uniU16TimesMultiplier_PostShift_2x8);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "uniU8PlusU8_trans_0_2x8", &uniU8PlusU8_trans_0_2x8);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "uniU8PlusU8_trans_1_2x8", &uniU8PlusU8_trans_1_2x8);
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "output_zp", &output_zp);
|
||||||
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
|
}
|
||||||
|
|
||||||
|
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||||
|
|
||||||
|
final:
|
||||||
|
if (attr[0])
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_tensor_attr_release( &attr[0] );
|
||||||
|
}
|
||||||
|
if (attr[1])
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_tensor_attr_release( &attr[1] );
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
} /* _tiny_yolov4_postprocess_confidence_initializer() */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Query kernel
|
||||||
|
*/
|
||||||
|
static vsi_status _query_kernel
|
||||||
|
(
|
||||||
|
vsi_nn_kernel_t * kernel,
|
||||||
|
vsi_nn_tensor_t * const * const inputs,
|
||||||
|
vsi_nn_tensor_t * const * const outputs
|
||||||
|
/* Add extra params */
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
vsi_nn_kernel_dtype_e in_dtype;
|
||||||
|
vsi_nn_kernel_dtype_e out_dtype;
|
||||||
|
const _kernel_map_type * kernel_map = _tiny_yolov4_postprocess_confidence_kernel_map;
|
||||||
|
size_t kernel_map_size = _cnt_of_array( _tiny_yolov4_postprocess_confidence_kernel_map );
|
||||||
|
vx_param_description_t * param_def = _tiny_yolov4_postprocess_confidence_kernel_param_def;
|
||||||
|
vx_kernel_initialize_f initializer = _tiny_yolov4_postprocess_confidence_initializer;
|
||||||
|
|
||||||
|
uint32_t key;
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
|
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
||||||
|
key = _CONFIDENCE_HASH_KEY( in_dtype, out_dtype );
|
||||||
|
|
||||||
|
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||||
|
{
|
||||||
|
if ( kernel_map[i].key == key )
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( i < (uint32_t)kernel_map_size )
|
||||||
|
{
|
||||||
|
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||||
|
kernel->info.parameters = param_def;
|
||||||
|
kernel->info.numParams = _cnt_of_array( _tiny_yolov4_postprocess_confidence_kernel_param_def );
|
||||||
|
kernel->info.initialize = initializer;
|
||||||
|
// Register code source
|
||||||
|
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
||||||
|
kernel_map[i].source_name );
|
||||||
|
// Register binary source
|
||||||
|
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||||
|
kernel_map[i].source_name );
|
||||||
|
status = VSI_SUCCESS;
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
} /* _query_kernel() */
|
||||||
|
|
||||||
|
|
||||||
|
static vsi_nn_kernel_node_t _setup
|
||||||
|
(
|
||||||
|
vsi_nn_graph_t * graph,
|
||||||
|
vsi_nn_tensor_t ** inputs,
|
||||||
|
size_t input_num,
|
||||||
|
vsi_nn_tensor_t ** outputs,
|
||||||
|
size_t output_num,
|
||||||
|
const vsi_nn_kernel_param_t * params,
|
||||||
|
vsi_nn_kernel_t * kernel
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
vsi_nn_kernel_node_param_t node_params[_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM];
|
||||||
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
|
vsi_size_t shape[2][VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||||
|
vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
|
||||||
|
memcpy(shape[0], inputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
|
||||||
|
shape[0][0] = shape[0][0] * shape[0][1];
|
||||||
|
shape[0][1] = shape[0][2];
|
||||||
|
shape[0][2] = 1;
|
||||||
|
|
||||||
|
memcpy(shape[1], outputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
|
||||||
|
shape[1][0] = shape[1][0];
|
||||||
|
shape[1][1] = shape[1][2] * shape[1][1];
|
||||||
|
shape[1][2] = 1;
|
||||||
|
|
||||||
|
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||||
|
inputs[0], shape[0], inputs[0]->attr.dim_num );
|
||||||
|
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||||
|
outputs[0], shape[1], outputs[0]->attr.dim_num );
|
||||||
|
|
||||||
|
if ( !vsi_nn_kernel_gpu_check_shape(
|
||||||
|
reshape_tensors[0]->attr.size, reshape_tensors[0]->attr.dim_num ) )
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = _query_kernel( kernel, inputs, outputs );
|
||||||
|
if ( VSI_SUCCESS == status)
|
||||||
|
{
|
||||||
|
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||||
|
if ( node )
|
||||||
|
{
|
||||||
|
/* Set inputs and outputs */
|
||||||
|
vsi_nn_kernel_node_pack_io( node_params, _TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM,
|
||||||
|
reshape_tensors, input_num, &reshape_tensors[1], output_num );
|
||||||
|
/* Pass parameters to node. */
|
||||||
|
status = vsi_nn_kernel_node_pass_param( node, node_params,
|
||||||
|
_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vsi_safe_release_tensor(reshape_tensors[0]);
|
||||||
|
vsi_safe_release_tensor(reshape_tensors[1]);
|
||||||
|
|
||||||
|
return node;
|
||||||
|
} /* _setup() */
|
||||||
|
|
||||||
|
__END_DECLS
|
||||||
|
|
||||||
|
REGISTER_BACKEND_EVIS( tiny_yolov4_postprocess_confidence, _setup )
|
||||||
|
|
||||||
|
|
@ -50,18 +50,27 @@ typedef enum _custom_warp_affine_type_e
|
||||||
}custom_warp_affine_type_e;
|
}custom_warp_affine_type_e;
|
||||||
|
|
||||||
#define _CUSTOM_WARP_AFFINE_KERNEL_SOURCE "custom_warp_affine"
|
#define _CUSTOM_WARP_AFFINE_KERNEL_SOURCE "custom_warp_affine"
|
||||||
|
#define _CUSTOM_WARP_AFFINE_RGB_KERNEL_SOURCE "custom_warp_affine_rgb"
|
||||||
|
|
||||||
// Add kernel hashtable here
|
// Add kernel hashtable here
|
||||||
#define CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, IMG_2D ) \
|
#define CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, IMG_2D, RGB_TYPE ) \
|
||||||
(( IN_DTYPE ) | ( OUT_DTYPE << 8 ) | (TYPE << 16) | (IMG_2D << 20))
|
(( IN_DTYPE ) | ( OUT_DTYPE << 8 ) | (TYPE << 16) | (IMG_2D << 20) | (RGB_TYPE << 24))
|
||||||
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
|
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
|
||||||
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 0 ), \
|
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 0, 0 ), \
|
||||||
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE), \
|
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE), \
|
||||||
_CUSTOM_WARP_AFFINE_KERNEL_SOURCE }
|
_CUSTOM_WARP_AFFINE_KERNEL_SOURCE }
|
||||||
#define PACK_2D_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
|
#define PACK_2D_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
|
||||||
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 1 ), \
|
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 1, 0 ), \
|
||||||
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_2D"), \
|
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_2D"), \
|
||||||
_CUSTOM_WARP_AFFINE_KERNEL_SOURCE }
|
_CUSTOM_WARP_AFFINE_KERNEL_SOURCE }
|
||||||
|
#define PACK_RGB_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
|
||||||
|
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 0, 1 ), \
|
||||||
|
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_rgb"), \
|
||||||
|
_CUSTOM_WARP_AFFINE_RGB_KERNEL_SOURCE }
|
||||||
|
#define PACK_RGB_2D_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
|
||||||
|
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 1, 1 ), \
|
||||||
|
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_rgb_2D"), \
|
||||||
|
_CUSTOM_WARP_AFFINE_RGB_KERNEL_SOURCE }
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
|
@ -78,6 +87,12 @@ static const _kernel_map_type _custom_warp_affine_kernel_map[] =
|
||||||
|
|
||||||
PACK_2D_KERNEL_MAP( U8, U8, nearest_neighbor ),
|
PACK_2D_KERNEL_MAP( U8, U8, nearest_neighbor ),
|
||||||
PACK_2D_KERNEL_MAP( U8, U8, bilinear ),
|
PACK_2D_KERNEL_MAP( U8, U8, bilinear ),
|
||||||
|
|
||||||
|
PACK_RGB_KERNEL_MAP( U8, U8, nearest_neighbor ),
|
||||||
|
PACK_RGB_KERNEL_MAP( U8, U8, bilinear ),
|
||||||
|
|
||||||
|
PACK_RGB_2D_KERNEL_MAP( U8, U8, nearest_neighbor ),
|
||||||
|
PACK_RGB_2D_KERNEL_MAP( U8, U8, bilinear ),
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -124,6 +139,8 @@ DEF_KERNEL_INITIALIZER(_custom_warp_affine_initializer)
|
||||||
float matrix4[4] = {0};
|
float matrix4[4] = {0};
|
||||||
int32_t i = 0;
|
int32_t i = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -178,7 +195,81 @@ final:
|
||||||
return status;
|
return status;
|
||||||
} /* _custom_warp_affine_initializer() */
|
} /* _custom_warp_affine_initializer() */
|
||||||
|
|
||||||
|
DEF_KERNEL_INITIALIZER(_custom_warp_affine_rgb_initializer)
|
||||||
|
(
|
||||||
|
vsi_nn_kernel_node_t node,
|
||||||
|
const vsi_nn_kernel_node_param_t * param,
|
||||||
|
size_t param_size
|
||||||
|
)
|
||||||
|
{
|
||||||
|
vsi_status status = VSI_FAILURE;
|
||||||
|
gpu_param_t gpu_param = {
|
||||||
|
3,
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0},
|
||||||
|
{0, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
vsi_nn_kernel_tensor_attr_t* attr[2] = {NULL, NULL};
|
||||||
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
float m[6] = {0};
|
||||||
|
float matrix0[4] = {0};
|
||||||
|
float matrix1[4] = {0};
|
||||||
|
int32_t i = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
for (i = 0; i < 6; i++)
|
||||||
|
{
|
||||||
|
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MATRIX_OFFSET + i],
|
||||||
|
&m[i]);
|
||||||
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
|
}
|
||||||
|
|
||||||
|
matrix0[0] = m[0]; matrix0[1] = m[1]; matrix0[2] = m[2]; matrix0[3] = m[3];
|
||||||
|
matrix1[0] = m[4]; matrix1[1] = m[5];
|
||||||
|
|
||||||
|
out_shape = attr[1]->shape;
|
||||||
|
|
||||||
|
gpu_param.global_scale[0] = 2;
|
||||||
|
gpu_param.global_scale[1] = 1;
|
||||||
|
gpu_param.global_scale[2] = 1;
|
||||||
|
gpu_param.global_size[0] = (
|
||||||
|
(out_shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||||
|
/ (3 * gpu_param.global_scale[0]));
|
||||||
|
gpu_param.global_size[1] = (
|
||||||
|
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||||
|
/ gpu_param.global_scale[1]);
|
||||||
|
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
|
||||||
|
|
||||||
|
status = vsi_nn_kernel_gpu_add_param( node,
|
||||||
|
"matrix0", &matrix0 );
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node,
|
||||||
|
"matrix1", &matrix1 );
|
||||||
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
|
|
||||||
|
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||||
|
|
||||||
|
final:
|
||||||
|
if (attr[0])
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_tensor_attr_release( &attr[0] );
|
||||||
|
attr[0] = NULL;
|
||||||
|
}
|
||||||
|
if (attr[1])
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_tensor_attr_release( &attr[1] );
|
||||||
|
attr[1] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
} /* _custom_warp_affine_rgb_initializer() */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Query kernel
|
* Query kernel
|
||||||
|
|
@ -188,7 +279,8 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_t * kernel,
|
vsi_nn_kernel_t * kernel,
|
||||||
vsi_nn_tensor_t * const * const inputs,
|
vsi_nn_tensor_t * const * const inputs,
|
||||||
vsi_nn_tensor_t * const * const outputs,
|
vsi_nn_tensor_t * const * const outputs,
|
||||||
int32_t type
|
int32_t type,
|
||||||
|
int32_t rgb_type
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
|
|
@ -205,8 +297,11 @@ static vsi_status _query_kernel
|
||||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
||||||
key = CUSTOM_WARP_AFFINE_HASH_KEY( in_dtype, out_dtype, type, is_2d_img );
|
key = CUSTOM_WARP_AFFINE_HASH_KEY( in_dtype, out_dtype, type, is_2d_img, rgb_type );
|
||||||
|
if (rgb_type == 1)
|
||||||
|
{
|
||||||
|
initializer = _custom_warp_affine_rgb_initializer;
|
||||||
|
}
|
||||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||||
{
|
{
|
||||||
if ( kernel_map[i].key == key )
|
if ( kernel_map[i].key == key )
|
||||||
|
|
@ -251,6 +346,7 @@ static vsi_nn_kernel_node_t _setup
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
size_t buffer_size = 0;
|
size_t buffer_size = 0;
|
||||||
int32_t type = vsi_nn_kernel_param_get_int32( params, "type");
|
int32_t type = vsi_nn_kernel_param_get_int32( params, "type");
|
||||||
|
int32_t rgb_type = vsi_nn_kernel_param_get_int32( params, "rgb_type");
|
||||||
float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size );
|
float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size );
|
||||||
|
|
||||||
if (vsi_nn_DtypeCompare(&inputs[0]->attr.dtype, &outputs[0]->attr.dtype) == FALSE)
|
if (vsi_nn_DtypeCompare(&inputs[0]->attr.dtype, &outputs[0]->attr.dtype) == FALSE)
|
||||||
|
|
@ -258,7 +354,7 @@ static vsi_nn_kernel_node_t _setup
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
status = _query_kernel( kernel, inputs, outputs, type );
|
status = _query_kernel( kernel, inputs, outputs, type, rgb_type );
|
||||||
if ( VSI_SUCCESS == status)
|
if ( VSI_SUCCESS == status)
|
||||||
{
|
{
|
||||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||||
|
|
@ -282,7 +378,7 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] );
|
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] );
|
||||||
}
|
}
|
||||||
// Set default border mode.
|
// Set default border mode.
|
||||||
border.constant_value.U32 = 0xcdcdcdcd;
|
border.constant_value.U32 = 0x00000000;
|
||||||
status = vxSetNodeAttribute( (vx_node)node, VX_NODE_BORDER, &border, sizeof(border) );
|
status = vxSetNodeAttribute( (vx_node)node, VX_NODE_BORDER, &border, sizeof(border) );
|
||||||
CHECK_STATUS(status);
|
CHECK_STATUS(status);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -127,6 +127,8 @@ DEF_KERNEL_INITIALIZER(_custom_warp_perspective_initializer)
|
||||||
float matrix4[4] = {0};
|
float matrix4[4] = {0};
|
||||||
int32_t i = 0;
|
int32_t i = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,9 @@ static vsi_status op_compute
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_SUCCESS;
|
vsi_status status = VSI_SUCCESS;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
|
|
||||||
#if defined(VX_DENOISE_POSTPROCESS_SUPPORT) && VX_DENOISE_POSTPROCESS_SUPPORT
|
#if defined(VX_DENOISE_POSTPROCESS_SUPPORT) && VX_DENOISE_POSTPROCESS_SUPPORT
|
||||||
self->n = vxDenoisePostProcesslayer(
|
self->n = vxDenoisePostProcesslayer(
|
||||||
self->graph->g,
|
self->graph->g,
|
||||||
|
|
@ -83,6 +86,9 @@ static vsi_bool op_check
|
||||||
vsi_nn_tensor_t ** outputs
|
vsi_nn_tensor_t ** outputs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} /* op_check() */
|
} /* op_check() */
|
||||||
|
|
||||||
|
|
@ -93,6 +99,9 @@ static vsi_bool op_setup
|
||||||
vsi_nn_tensor_t ** outputs
|
vsi_nn_tensor_t ** outputs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} /* op_setup() */
|
} /* op_setup() */
|
||||||
|
|
||||||
|
|
@ -101,6 +110,7 @@ static vsi_status op_init
|
||||||
vsi_nn_node_t* self
|
vsi_nn_node_t* self
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
return VSI_SUCCESS;
|
return VSI_SUCCESS;
|
||||||
} /* op_init() */
|
} /* op_init() */
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,9 @@ static vsi_bool op_check
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
/*TODO: Check params. */
|
/*TODO: Check params. */
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} /* op_check() */
|
} /* op_check() */
|
||||||
|
|
||||||
|
|
@ -73,6 +76,7 @@ static vsi_bool op_setup
|
||||||
vsi_nn_tensor_t ** outputs
|
vsi_nn_tensor_t ** outputs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num)
|
if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num)
|
||||||
{
|
{
|
||||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,9 @@ static vsi_bool op_check
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
/*TODO: Check params. */
|
/*TODO: Check params. */
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} /* op_check() */
|
} /* op_check() */
|
||||||
|
|
||||||
|
|
@ -72,6 +75,7 @@ static vsi_bool op_setup
|
||||||
vsi_nn_tensor_t ** outputs
|
vsi_nn_tensor_t ** outputs
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
if( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
if( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
||||||
{
|
{
|
||||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ static vsi_status op_compute
|
||||||
param = vsi_nn_kernel_param_create();
|
param = vsi_nn_kernel_param_create();
|
||||||
vsi_nn_kernel_param_add_const_buffer( param, "matrix", p->matrix, 6 );
|
vsi_nn_kernel_param_add_const_buffer( param, "matrix", p->matrix, 6 );
|
||||||
vsi_nn_kernel_param_add_int32( param, "type", p->type);
|
vsi_nn_kernel_param_add_int32( param, "type", p->type);
|
||||||
|
vsi_nn_kernel_param_add_int32( param, "rgb_type", p->rgb_type);
|
||||||
|
|
||||||
self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
|
self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
|
||||||
"custom_warp_affine",
|
"custom_warp_affine",
|
||||||
|
|
@ -78,6 +79,9 @@ static vsi_bool op_check
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
/*TODO: Check tensor shapes. */
|
/*TODO: Check tensor shapes. */
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} /* op_check() */
|
} /* op_check() */
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,9 @@ static vsi_bool op_check
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
/*TODO: Check tensor shapes. */
|
/*TODO: Check tensor shapes. */
|
||||||
|
VSI_UNREFERENCED(self);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
} /* op_check() */
|
} /* op_check() */
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
|
||||||
size_t param_size
|
size_t param_size
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VX_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
// Alignment with a power of two value.
|
// Alignment with a power of two value.
|
||||||
gpu_param_t gpu_param = {
|
gpu_param_t gpu_param = {
|
||||||
2,
|
2,
|
||||||
|
|
@ -113,6 +113,8 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t *input0_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t *input0_attr = NULL;
|
||||||
vsi_size_array_t *input_shape = NULL;
|
vsi_size_array_t *input_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
|
input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
|
||||||
CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||||
input_shape = input0_attr->shape;
|
input_shape = input0_attr->shape;
|
||||||
|
|
|
||||||
|
|
@ -143,6 +143,8 @@ DEF_KERNEL_INITIALIZER(_argmax_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -183,7 +185,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e output_dtype;
|
vsi_nn_kernel_dtype_e output_dtype;
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
uint32_t key;
|
uint32_t key;
|
||||||
int32_t i;
|
size_t i;
|
||||||
|
|
||||||
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -240,6 +242,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
int32_t axis = 0;
|
int32_t axis = 0;
|
||||||
vsi_size_t axis_size = 0;
|
vsi_size_t axis_size = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||||
|
|
||||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||||
|
|
|
||||||
|
|
@ -143,6 +143,8 @@ DEF_KERNEL_INITIALIZER(_argmin_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -183,7 +185,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e output_dtype;
|
vsi_nn_kernel_dtype_e output_dtype;
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
uint32_t key;
|
uint32_t key;
|
||||||
int32_t i;
|
size_t i;
|
||||||
|
|
||||||
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -240,6 +242,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
int32_t axis = 0;
|
int32_t axis = 0;
|
||||||
size_t axis_size = 0;
|
size_t axis_size = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||||
|
|
||||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||||
|
|
|
||||||
|
|
@ -129,6 +129,8 @@ DEF_KERNEL_INITIALIZER(_avg_pool3d_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||||
vsi_size_array_t *output_shape = NULL;
|
vsi_size_array_t *output_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
vxReadScalarValue(depth_out, &depth_out_value);
|
vxReadScalarValue(depth_out, &depth_out_value);
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||||
|
|
|
||||||
|
|
@ -135,6 +135,8 @@ DEF_KERNEL_INITIALIZER(_log_softmax_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||||
vsi_size_array_t * in_shape = NULL;
|
vsi_size_array_t * in_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
|
@ -170,7 +172,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e output_dtype;
|
vsi_nn_kernel_dtype_e output_dtype;
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
uint32_t key;
|
uint32_t key;
|
||||||
int i;
|
size_t i;
|
||||||
|
|
||||||
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -238,6 +240,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f;
|
float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f;
|
||||||
float eps = vsi_nn_kernel_param_get_float32(params, "eps");
|
float eps = vsi_nn_kernel_param_get_float32(params, "eps");
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
if ( (inputs[1]->attr.is_const && inputs[2]->attr.is_const)
|
if ( (inputs[1]->attr.is_const && inputs[2]->attr.is_const)
|
||||||
|| ( inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT16
|
|| ( inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT16
|
||||||
&& inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT32 )
|
&& inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT32 )
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,8 @@ DEF_KERNEL_INITIALIZER(_bilinear_grid_sample_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t* output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t* output_attr = NULL;
|
||||||
vsi_size_array_t* out_shape = NULL;
|
vsi_size_array_t* out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
output_attr =
|
output_attr =
|
||||||
vsi_nn_kernel_tensor_attr_create((vsi_nn_kernel_tensor_t)param[2]);
|
vsi_nn_kernel_tensor_attr_create((vsi_nn_kernel_tensor_t)param[2]);
|
||||||
CHECK_PTR_FAIL_GOTO(output_attr, "Create tensor attr buffer fail.", final);
|
CHECK_PTR_FAIL_GOTO(output_attr, "Create tensor attr buffer fail.", final);
|
||||||
|
|
@ -140,9 +142,8 @@ DEF_KERNEL_INITIALIZER(_bilinear_grid_sample_initializer)
|
||||||
|
|
||||||
gpu_param.dim = 2;
|
gpu_param.dim = 2;
|
||||||
gpu_param.global_size[0] =
|
gpu_param.global_size[0] =
|
||||||
gpu_align_p2((out_shape->data[0] + gpu_param.global_scale[0] - 1) /
|
(out_shape->data[0] + gpu_param.global_scale[0] - 1) /
|
||||||
gpu_param.global_scale[0],
|
gpu_param.global_scale[0];
|
||||||
4);
|
|
||||||
gpu_param.global_size[1] =
|
gpu_param.global_size[1] =
|
||||||
((out_shape->data[1] + gpu_param.global_scale[1] - 1) /
|
((out_shape->data[1] + gpu_param.global_scale[1] - 1) /
|
||||||
gpu_param.global_scale[1]);
|
gpu_param.global_scale[1]);
|
||||||
|
|
|
||||||
|
|
@ -134,6 +134,8 @@ DEF_KERNEL_INITIALIZER(_bucketize_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,8 @@ DEF_KERNEL_INITIALIZER(_cast_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
|
@ -251,6 +253,8 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_bool image_2d = FALSE;
|
vsi_bool image_2d = FALSE;
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
|
||||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||||
inputs[0]->attr.dim_num ) )
|
inputs[0]->attr.dim_num ) )
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,8 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -229,6 +229,8 @@ DEF_KERNEL_INITIALIZER(_comparisons_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -285,7 +287,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e output_dtype;
|
vsi_nn_kernel_dtype_e output_dtype;
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
uint32_t key;
|
uint32_t key;
|
||||||
int i;
|
size_t i;
|
||||||
|
|
||||||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||||
|
|
@ -347,6 +349,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
|
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
|
||||||
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]) * input1Scale;
|
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]) * input1Scale;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
ret = vsi_nn_kernel_optimize_eltwise_shape(
|
ret = vsi_nn_kernel_optimize_eltwise_shape(
|
||||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||||
inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||||
|
|
@ -363,11 +368,11 @@ static vsi_nn_kernel_node_t _setup
|
||||||
outputs[0], shapes[2], new_rank );
|
outputs[0], shapes[2], new_rank );
|
||||||
|
|
||||||
#define _swap_tensor(a, b, tmp) \
|
#define _swap_tensor(a, b, tmp) \
|
||||||
do { \
|
{ \
|
||||||
tmp = a; \
|
tmp = a; \
|
||||||
a = b; \
|
a = b; \
|
||||||
b = tmp; \
|
b = tmp; \
|
||||||
} while(0)
|
}
|
||||||
|
|
||||||
if (shapes[1][3] > shapes[0][3] && new_rank == 4)
|
if (shapes[1][3] > shapes[0][3] && new_rank == 4)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -135,6 +135,8 @@ DEF_KERNEL_INITIALIZER(_cumsum_initializer)
|
||||||
int32_t c = 1;
|
int32_t c = 1;
|
||||||
uint32_t dim = 1;
|
uint32_t dim = 1;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
|
@ -203,7 +205,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e input0_dtype = U8;
|
vsi_nn_kernel_dtype_e input0_dtype = U8;
|
||||||
vsi_nn_kernel_dtype_e output_dtype = U8;
|
vsi_nn_kernel_dtype_e output_dtype = U8;
|
||||||
uint32_t key = 0;
|
uint32_t key = 0;
|
||||||
int i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -288,11 +290,28 @@ static vsi_nn_kernel_node_t _setup
|
||||||
int32_t width = 0;
|
int32_t width = 0;
|
||||||
int32_t height = 0;
|
int32_t height = 0;
|
||||||
int32_t channel = 1;
|
int32_t channel = 1;
|
||||||
int32_t i = 0;
|
uint32_t i = 0;
|
||||||
|
|
||||||
vsi_nn_kernel_optimize_softmax_shape(
|
VSI_UNREFERENCED(input_num);
|
||||||
inputs[0]->attr.size, inputs[0]->attr.dim_num, axis,
|
VSI_UNREFERENCED(output_num);
|
||||||
shapes[0], &rs_dim, &axis_new);
|
|
||||||
|
if (axis < 0)
|
||||||
|
{
|
||||||
|
axis_new = 0;
|
||||||
|
shapes[0][0] = 1;
|
||||||
|
shapes[0][1] = 1;
|
||||||
|
for (i = 0; i < inputs[0]->attr.dim_num; i++)
|
||||||
|
{
|
||||||
|
shapes[0][0] *= inputs[0]->attr.size[i];
|
||||||
|
}
|
||||||
|
rs_dim = 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vsi_nn_kernel_optimize_softmax_shape(
|
||||||
|
inputs[0]->attr.size, inputs[0]->attr.dim_num, axis,
|
||||||
|
shapes[0], &rs_dim, &axis_new);
|
||||||
|
}
|
||||||
if (rs_dim > 3)
|
if (rs_dim > 3)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,8 @@ DEF_KERNEL_INITIALIZER(_depth2space_crd_initializer)
|
||||||
int32_t output_height = 0;
|
int32_t output_height = 0;
|
||||||
int32_t output_chn = 0;
|
int32_t output_chn = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
|
@ -145,7 +147,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e input0_dtype = U8;
|
vsi_nn_kernel_dtype_e input0_dtype = U8;
|
||||||
vsi_nn_kernel_dtype_e output_dtype = U8;
|
vsi_nn_kernel_dtype_e output_dtype = U8;
|
||||||
uint32_t key = 0;
|
uint32_t key = 0;
|
||||||
int i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -195,6 +197,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
|
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||||
outputs[0]->attr.dim_num ) )
|
outputs[0]->attr.dim_num ) )
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,9 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * input_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t * input_attr = NULL;
|
||||||
vsi_size_array_t * in_shape = NULL;
|
vsi_size_array_t * in_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
|
||||||
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
|
||||||
in_shape = input_attr->shape;
|
in_shape = input_attr->shape;
|
||||||
|
|
|
||||||
|
|
@ -181,6 +181,14 @@ static vsi_nn_kernel_node_t _setup
|
||||||
{
|
{
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(graph);
|
||||||
|
VSI_UNREFERENCED(inputs);
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(outputs);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
VSI_UNREFERENCED(kernel);
|
||||||
|
|
||||||
return node;
|
return node;
|
||||||
} /* _setup() */
|
} /* _setup() */
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -211,6 +211,9 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -253,7 +256,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e output_dtype;
|
vsi_nn_kernel_dtype_e output_dtype;
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
uint32_t key;
|
uint32_t key;
|
||||||
int i;
|
size_t i;
|
||||||
|
|
||||||
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -327,6 +330,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
||||||
float beta = vsi_nn_kernel_param_get_float32( params, "beta" );
|
float beta = vsi_nn_kernel_param_get_float32( params, "beta" );
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
if (unary_type == UNARY_SELU)
|
if (unary_type == UNARY_SELU)
|
||||||
{
|
{
|
||||||
alpha = alpha * beta;
|
alpha = alpha * beta;
|
||||||
|
|
|
||||||
|
|
@ -135,6 +135,9 @@ DEF_KERNEL_INITIALIZER(_erf_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -243,6 +246,10 @@ static vsi_nn_kernel_node_t _setup
|
||||||
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||||
float outputZP = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f;
|
float outputZP = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
|
||||||
ret = vsi_nn_kernel_optimize_element_shape(
|
ret = vsi_nn_kernel_optimize_element_shape(
|
||||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||||
shape, &new_rank );
|
shape, &new_rank );
|
||||||
|
|
|
||||||
|
|
@ -122,11 +122,14 @@ DEF_KERNEL_INITIALIZER(_floordiv_initializer)
|
||||||
{0, 0, 0},
|
{0, 0, 0},
|
||||||
{0, 0, 0}
|
{0, 0, 0}
|
||||||
};
|
};
|
||||||
vx_status status = VX_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
vx_tensor output = (vx_tensor)param[2];
|
vx_tensor output = (vx_tensor)param[2];
|
||||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||||
vsi_size_array_t *output_shape = NULL;
|
vsi_size_array_t *output_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||||
|
|
||||||
|
|
@ -258,6 +261,8 @@ static vsi_nn_kernel_node_t _setup
|
||||||
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
|
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
|
||||||
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]);
|
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]);
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
|
||||||
outputScale = 1.0f / outputScale;
|
outputScale = 1.0f / outputScale;
|
||||||
input0Tail = -(input0Tail * input0Scale);
|
input0Tail = -(input0Tail * input0Scale);
|
||||||
input1Tail = -(input1Tail * input1Scale);
|
input1Tail = -(input1Tail * input1Scale);
|
||||||
|
|
|
||||||
|
|
@ -205,6 +205,9 @@ DEF_KERNEL_INITIALIZER(_gather_initializer)
|
||||||
size_t input_dims1 = 0;
|
size_t input_dims1 = 0;
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -264,7 +267,7 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e input0_dtype = U8;
|
vsi_nn_kernel_dtype_e input0_dtype = U8;
|
||||||
vsi_nn_kernel_dtype_e output_dtype = U8;
|
vsi_nn_kernel_dtype_e output_dtype = U8;
|
||||||
uint32_t key = 0;
|
uint32_t key = 0;
|
||||||
int i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -334,6 +337,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
int32_t is_array = block_size >= GPU_TENSOR_MAX_WIDTH ? 1 : 0;
|
int32_t is_array = block_size >= GPU_TENSOR_MAX_WIDTH ? 1 : 0;
|
||||||
int32_t i = 0;
|
int32_t i = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0, &is_array);
|
status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0, &is_array);
|
||||||
status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1, &is_array);
|
status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1, &is_array);
|
||||||
status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0, &is_array);
|
status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0, &is_array);
|
||||||
|
|
|
||||||
|
|
@ -51,18 +51,30 @@ typedef enum
|
||||||
|
|
||||||
#define STR(a) #a
|
#define STR(a) #a
|
||||||
// Add kernel hashtable here
|
// Add kernel hashtable here
|
||||||
#define GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, IMG_2D ) \
|
#define GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, IMG_2D, BEYOND_MAXWIDTH ) \
|
||||||
(( AXIS ) | ( IN0_DTYPE << 2 ) | ( IN1_DTYPE << 10 ) | ( OUT_DTYPE << 18 ) | ( IMG_2D << 26 ))
|
(( AXIS ) | ( IN0_DTYPE << 2 ) | ( IN1_DTYPE << 10 ) | ( OUT_DTYPE << 18 ) | ( IMG_2D << 26 ) | \
|
||||||
|
(BEYOND_MAXWIDTH << 28))
|
||||||
#define PACK_KERNEL_3D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
#define PACK_KERNEL_3D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||||
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 0 ), \
|
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 0 , 0), \
|
||||||
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)), \
|
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)), \
|
||||||
_GATHER_ELEMENTS_KERNEL_SOURCE}
|
_GATHER_ELEMENTS_KERNEL_SOURCE}
|
||||||
|
|
||||||
#define PACK_KERNEL_2D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
#define PACK_KERNEL_2D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||||
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 1 ), \
|
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 1 , 0), \
|
||||||
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)"_2D"), \
|
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)"_2D"), \
|
||||||
_GATHER_ELEMENTS_KERNEL_SOURCE}
|
_GATHER_ELEMENTS_KERNEL_SOURCE}
|
||||||
|
|
||||||
|
#define PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||||
|
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 0 , 1), \
|
||||||
|
CVIVANTE_NAMESPACE("cl.gather_elements_beyond_maxwidth_axis"STR(AXIS)"_"STR(IN0_DTYPE)\
|
||||||
|
"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)), \
|
||||||
|
_GATHER_ELEMENTS_KERNEL_SOURCE}
|
||||||
|
|
||||||
|
#define PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||||
|
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 1 , 1), \
|
||||||
|
CVIVANTE_NAMESPACE("cl.gather_elements_beyond_maxwidth_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)\
|
||||||
|
"to"STR(OUT_DTYPE)"_2D"), _GATHER_ELEMENTS_KERNEL_SOURCE}
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
uint32_t key;
|
uint32_t key;
|
||||||
|
|
@ -89,6 +101,44 @@ static const _kernel_map_type _gather_elements_kernel_map[] =
|
||||||
PACK_KERNEL_2D_MAP( 1, F32, I32, F32 ),
|
PACK_KERNEL_2D_MAP( 1, F32, I32, F32 ),
|
||||||
PACK_KERNEL_2D_MAP( 1, I32, I32, I32 ),
|
PACK_KERNEL_2D_MAP( 1, I32, I32, I32 ),
|
||||||
PACK_KERNEL_2D_MAP( 1, U32, I32, U32 ),
|
PACK_KERNEL_2D_MAP( 1, U32, I32, U32 ),
|
||||||
|
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, F32, I32, F32),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, F16, I32, F16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, I32, I32, I32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, I16, I32, I16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, I8, I32, I8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, U8, I32, U8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, F32, I32, F32),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, F16, I32, F16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, I32, I32, I32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, I16, I32, I16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, I8, I32, I8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, U8, I32, U8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, F32, I32, F32),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, F16, I32, F16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, I32, I32, I32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, I16, I32, I16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, I8, I32, I8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, U8, I32, U8 ),
|
||||||
|
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, F32, I32, F32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, F16, I32, F16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, I32, I32, I32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, I16, I32, I16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, I8, I32, I8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, U8, I32, U8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, F32, I32, F32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, F16, I32, F16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, I32, I32, I32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, I16, I32, I16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, I8, I32, I8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, U8, I32, U8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, F32, I32, F32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, F16, I32, F16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, I32, I32, I32 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, I16, I32, I16 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, I8, I32, I8 ),
|
||||||
|
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, U8, I32, U8 ),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -126,12 +176,38 @@ DEF_KERNEL_INITIALIZER(_gather_elements_initializer)
|
||||||
{0, 0, 0},
|
{0, 0, 0},
|
||||||
{0, 0, 0}
|
{0, 0, 0}
|
||||||
};
|
};
|
||||||
|
vsi_nn_kernel_tensor_attr_t * input_attr0 = NULL;
|
||||||
|
vsi_nn_kernel_tensor_attr_t * input_attr1 = NULL;
|
||||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||||
vsi_size_array_t * out_shape = NULL;
|
vsi_size_array_t * out_shape = NULL;
|
||||||
|
uint32_t width0 = 0;
|
||||||
|
uint32_t height0 = 0;
|
||||||
|
uint32_t width1 = 0;
|
||||||
|
uint32_t height1 = 0;
|
||||||
|
uint32_t width_out = 0;
|
||||||
|
uint32_t height_out = 0;
|
||||||
|
uint32_t depth0 = 0;
|
||||||
|
uint32_t depth1 = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
|
input_attr0 = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( input_attr0, "Create tensor attr buffer fail.", final );
|
||||||
|
input_attr1 = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
CHECK_PTR_FAIL_GOTO( input_attr1, "Create tensor attr buffer fail.", final );
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
width0 = (uint32_t)input_attr0->shape->data[0];
|
||||||
|
height0 = (uint32_t)input_attr0->shape->data[1];
|
||||||
|
depth0 = input_attr0->shape->size > 2 ? (uint32_t)input_attr0->shape->data[2] : 1;
|
||||||
|
width1 = (uint32_t)input_attr1->shape->data[0];
|
||||||
|
height1 = (uint32_t)input_attr1->shape->data[1];
|
||||||
|
depth1 = input_attr1->shape->size > 2 ? (uint32_t)input_attr1->shape->data[2] : 1;
|
||||||
|
width_out = (uint32_t)output_attr->shape->data[0];
|
||||||
|
height_out = (uint32_t)output_attr->shape->data[1];
|
||||||
|
|
||||||
out_shape = output_attr->shape;
|
out_shape = output_attr->shape;
|
||||||
|
|
||||||
gpu_param.global_scale[0] = 1;
|
gpu_param.global_scale[0] = 1;
|
||||||
|
|
@ -146,7 +222,25 @@ DEF_KERNEL_INITIALIZER(_gather_elements_initializer)
|
||||||
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
|
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||||
/ gpu_param.global_scale[1]);
|
/ gpu_param.global_scale[1]);
|
||||||
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
|
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
|
||||||
|
|
||||||
|
if (width0 >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
width1 >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
height0 >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
height1 >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
depth0 >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
depth1 >= GPU_TENSOR_MAX_WIDTH)
|
||||||
|
{
|
||||||
|
gpu_param.global_scale[0] = 1;
|
||||||
|
gpu_param.global_size[0] = out_shape->data[0];
|
||||||
|
}
|
||||||
|
|
||||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "width0", &width0 );
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "height0", &height0 );
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "width1", &width1 );
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "height1", &height1 );
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "width_out", &width_out );
|
||||||
|
status |= vsi_nn_kernel_gpu_add_param( node, "height_out", &height_out );
|
||||||
|
|
||||||
final:
|
final:
|
||||||
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
|
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
|
||||||
|
|
@ -178,32 +272,52 @@ static vsi_status _query_kernel
|
||||||
int32_t img_2d = (outputs[0]->attr.dim_num < 3 || outputs[0]->attr.size[2] == 1) ? 1 : 0;
|
int32_t img_2d = (outputs[0]->attr.dim_num < 3 || outputs[0]->attr.size[2] == 1) ? 1 : 0;
|
||||||
uint32_t key = 0;
|
uint32_t key = 0;
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
|
int32_t beyond_maxwidth = 0;
|
||||||
|
vsi_size_t depth0 = inputs[0]->attr.dim_num > 2 ? inputs[0]->attr.size[2] : 1;
|
||||||
|
vsi_size_t depth1 = inputs[1]->attr.dim_num > 2 ? inputs[1]->attr.size[2] : 1;
|
||||||
|
|
||||||
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
||||||
|
if (inputs[0]->attr.size[0] >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
inputs[0]->attr.size[1] >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
inputs[1]->attr.size[0] >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
inputs[1]->attr.size[1] >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
depth0 >= GPU_TENSOR_MAX_WIDTH ||
|
||||||
|
depth1 >= GPU_TENSOR_MAX_WIDTH)
|
||||||
|
{
|
||||||
|
beyond_maxwidth = 1;
|
||||||
|
}
|
||||||
|
|
||||||
#define _PACK_SELECT_KEY( in0_type, out_type ) \
|
#define _PACK_SELECT_KEY( in0_type, out_type ) \
|
||||||
( ( in0_type ) | ( out_type << 8 ))
|
( ( in0_type ) | ( out_type << 8 ))
|
||||||
|
|
||||||
switch (_PACK_SELECT_KEY(in0_dtype, out_dtype))
|
if (beyond_maxwidth == 0)
|
||||||
{
|
{
|
||||||
case _PACK_SELECT_KEY(F32, F32):
|
switch (_PACK_SELECT_KEY(in0_dtype, out_dtype))
|
||||||
case _PACK_SELECT_KEY(F16, F16):
|
{
|
||||||
key = GATHER_ELEMENTS_HASH_KEY( axis, F32, in1_dtype, F32, img_2d );
|
case _PACK_SELECT_KEY(F32, F32):
|
||||||
break;
|
case _PACK_SELECT_KEY(F16, F16):
|
||||||
case _PACK_SELECT_KEY(U32, U32):
|
key = GATHER_ELEMENTS_HASH_KEY( axis, F32, in1_dtype, F32, img_2d, 0 );
|
||||||
case _PACK_SELECT_KEY(U16, U16):
|
break;
|
||||||
case _PACK_SELECT_KEY(U8, U8):
|
case _PACK_SELECT_KEY(U32, U32):
|
||||||
key = GATHER_ELEMENTS_HASH_KEY( axis, U32, in1_dtype, U32, img_2d );
|
case _PACK_SELECT_KEY(U16, U16):
|
||||||
break;
|
case _PACK_SELECT_KEY(U8, U8):
|
||||||
case _PACK_SELECT_KEY(I32, I32):
|
key = GATHER_ELEMENTS_HASH_KEY( axis, U32, in1_dtype, U32, img_2d, 0 );
|
||||||
case _PACK_SELECT_KEY(I16, I16):
|
break;
|
||||||
case _PACK_SELECT_KEY(I8, I8):
|
case _PACK_SELECT_KEY(I32, I32):
|
||||||
key = GATHER_ELEMENTS_HASH_KEY( axis, I32, in1_dtype, I32, img_2d );
|
case _PACK_SELECT_KEY(I16, I16):
|
||||||
break;
|
case _PACK_SELECT_KEY(I8, I8):
|
||||||
default:
|
key = GATHER_ELEMENTS_HASH_KEY( axis, I32, in1_dtype, I32, img_2d, 0 );
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
key = GATHER_ELEMENTS_HASH_KEY( axis, in0_dtype, in1_dtype, out_dtype, img_2d, 1 );
|
||||||
}
|
}
|
||||||
#undef _PACK_SELECT_KEY
|
#undef _PACK_SELECT_KEY
|
||||||
|
|
||||||
|
|
@ -221,7 +335,8 @@ static vsi_status _query_kernel
|
||||||
kernel->info.numParams = _cnt_of_array( _gather_elements_kernel_param_def );
|
kernel->info.numParams = _cnt_of_array( _gather_elements_kernel_param_def );
|
||||||
kernel->info.initialize = initializer;
|
kernel->info.initialize = initializer;
|
||||||
// Register code source
|
// Register code source
|
||||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
||||||
|
"eltwise_ops_helper",
|
||||||
kernel_map[i].source_name );
|
kernel_map[i].source_name );
|
||||||
// Register binary source
|
// Register binary source
|
||||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||||
|
|
|
||||||
|
|
@ -119,7 +119,7 @@ static vsi_status cal_gather_nd_tensor_reshape_size
|
||||||
uint32_t block_size,
|
uint32_t block_size,
|
||||||
uint32_t coordDim,
|
uint32_t coordDim,
|
||||||
int32_t* newDim,
|
int32_t* newDim,
|
||||||
int32_t batch_dims
|
uint32_t batch_dims
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
|
|
@ -146,17 +146,23 @@ static vsi_status cal_gather_nd_tensor_reshape_size
|
||||||
|
|
||||||
if (batch_dims)
|
if (batch_dims)
|
||||||
{
|
{
|
||||||
|
int32_t rank = 1;
|
||||||
for (i = 0; i < offset; i++)
|
for (i = 0; i < offset; i++)
|
||||||
{
|
{
|
||||||
sizes[0] *= input_size[i];
|
sizes[0] *= input_size[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < coordDim; i++)
|
for (i = 0; i < coordDim - 1; i++)
|
||||||
{
|
{
|
||||||
sizes[i + 1] = input_size[i + offset];
|
sizes[rank++] = input_size[i + offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
newDim[0] = coordDim == 1 ? 2 : 3;
|
for (i = 0; i < batch_dims; i++)
|
||||||
|
{
|
||||||
|
sizes[rank] *= input_size[dims_num - i - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
newDim[0] = rank + 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -186,13 +192,27 @@ static vsi_status cal_gather_nd_tensor_reshape_size
|
||||||
}
|
}
|
||||||
else // indices&output reshape
|
else // indices&output reshape
|
||||||
{
|
{
|
||||||
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
|
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH && batch_dims == 0)
|
||||||
{
|
{
|
||||||
sizes[0] = block_size;
|
sizes[0] = block_size;
|
||||||
sizes[1] = elementCnt / block_size;
|
sizes[1] = elementCnt / block_size;
|
||||||
status = VSI_SUCCESS;
|
status = VSI_SUCCESS;
|
||||||
newDim[0] = 2;
|
newDim[0] = 2;
|
||||||
}
|
}
|
||||||
|
else if (batch_dims > 0)
|
||||||
|
{
|
||||||
|
vsi_size_t batch_cnt = 1;
|
||||||
|
for (i = 0; i < batch_dims; ++i)
|
||||||
|
{
|
||||||
|
batch_cnt *= input_size[dims_num - i - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
sizes[0] = block_size;
|
||||||
|
sizes[1] = (elementCnt / block_size) / batch_cnt;
|
||||||
|
sizes[2] = batch_cnt;
|
||||||
|
status = VSI_SUCCESS;
|
||||||
|
newDim[0] = 3;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#undef VSI_NN_MAX_IMAGE_WIDTH
|
#undef VSI_NN_MAX_IMAGE_WIDTH
|
||||||
|
|
||||||
|
|
@ -220,7 +240,11 @@ DEF_KERNEL_INITIALIZER(_gather_nd_initializer)
|
||||||
|
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||||
int32_t block_size = 0;
|
int32_t block_size = 0;
|
||||||
vsi_ssize_t indices_num = 1;
|
vsi_size_t indices_num = 1;
|
||||||
|
vsi_size_t batch_num = 1;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
|
@ -229,6 +253,7 @@ DEF_KERNEL_INITIALIZER(_gather_nd_initializer)
|
||||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||||
|
|
||||||
indices_num = attr[0]->shape->data[1];
|
indices_num = attr[0]->shape->data[1];
|
||||||
|
batch_num = (attr[0]->shape->size > 2 ? attr[0]->shape->data[2] : 1);
|
||||||
|
|
||||||
gpu_param.global_scale[0] = 1;
|
gpu_param.global_scale[0] = 1;
|
||||||
gpu_param.global_scale[1] = 1;
|
gpu_param.global_scale[1] = 1;
|
||||||
|
|
@ -237,7 +262,7 @@ DEF_KERNEL_INITIALIZER(_gather_nd_initializer)
|
||||||
gpu_param.global_size[0] = gpu_align_p2((block_size + gpu_param.global_scale[0] - 1)
|
gpu_param.global_size[0] = gpu_align_p2((block_size + gpu_param.global_scale[0] - 1)
|
||||||
/ gpu_param.global_scale[0], 4);
|
/ gpu_param.global_scale[0], 4);
|
||||||
gpu_param.global_size[1] = indices_num;
|
gpu_param.global_size[1] = indices_num;
|
||||||
gpu_param.global_size[2] = 1;
|
gpu_param.global_size[2] = batch_num;
|
||||||
|
|
||||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||||
CHECK_STATUS_FAIL_GOTO(status, final);
|
CHECK_STATUS_FAIL_GOTO(status, final);
|
||||||
|
|
@ -265,7 +290,8 @@ static vsi_status _query_kernel
|
||||||
vsi_nn_kernel_dtype_e output_dtype = U8;
|
vsi_nn_kernel_dtype_e output_dtype = U8;
|
||||||
vsi_nn_kernel_coord_type_e coord_type = _error;
|
vsi_nn_kernel_coord_type_e coord_type = _error;
|
||||||
uint32_t key = 0;
|
uint32_t key = 0;
|
||||||
int i = 0;
|
int32_t batch_flg = batch_dims > 0 ? 1 : 0;
|
||||||
|
size_t i = 0;
|
||||||
|
|
||||||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||||
|
|
@ -301,7 +327,7 @@ static vsi_status _query_kernel
|
||||||
coord_type = _3D;
|
coord_type = _3D;
|
||||||
}
|
}
|
||||||
|
|
||||||
key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type, batch_dims );
|
key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type, batch_flg );
|
||||||
|
|
||||||
for ( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
|
for ( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
|
||||||
{
|
{
|
||||||
|
|
@ -348,6 +374,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
|
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
|
||||||
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim, batch_dims);
|
status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim, batch_dims);
|
||||||
status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim, batch_dims);
|
status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim, batch_dims);
|
||||||
status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim, batch_dims);
|
status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim, batch_dims);
|
||||||
|
|
|
||||||
|
|
@ -108,6 +108,9 @@ DEF_KERNEL_INITIALIZER(_globallppool_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||||
vsi_size_array_t *output_shape = NULL;
|
vsi_size_array_t *output_shape = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||||
output_shape = output_attr->shape;
|
output_shape = output_attr->shape;
|
||||||
|
|
|
||||||
|
|
@ -220,6 +220,9 @@ DEF_KERNEL_INITIALIZER(_groupnorm_sum_sqr_initializer)
|
||||||
vsi_ssize_t width = 0;
|
vsi_ssize_t width = 0;
|
||||||
vsi_ssize_t chn = 0;
|
vsi_ssize_t chn = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -275,6 +278,9 @@ DEF_KERNEL_INITIALIZER(_groupnorm_mean_vari_initializer)
|
||||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||||
vsi_ssize_t chn = 0;
|
vsi_ssize_t chn = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
|
|
||||||
|
|
@ -325,6 +331,9 @@ DEF_KERNEL_INITIALIZER(_groupnorm_initializer)
|
||||||
vsi_ssize_t chn = 0;
|
vsi_ssize_t chn = 0;
|
||||||
int32_t is2D = 0;
|
int32_t is2D = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] );
|
||||||
|
|
@ -489,6 +498,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
float rSpaceOrg = 1.0f / (width * height);
|
float rSpaceOrg = 1.0f / (width * height);
|
||||||
float group_ratio = 1.0f / (inputs[0]->attr.size[0] * inputs[0]->attr.size[1] * group_size);
|
float group_ratio = 1.0f / (inputs[0]->attr.size[0] * inputs[0]->attr.size[1] * group_size);
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||||
outputs[0]->attr.dim_num ) )
|
outputs[0]->attr.dim_num ) )
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,9 @@ DEF_KERNEL_INITIALIZER(_grucell_activation_initializer)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
// vsi_nn_kernel_tensor_attr * attr[2] = { NULL };
|
// vsi_nn_kernel_tensor_attr * attr[2] = { NULL };
|
||||||
// attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
// attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
// attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
// attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -172,6 +175,8 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_PARAM_NUM] = {NULL};
|
vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_PARAM_NUM] = {NULL};
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Check if gpu can support the size
|
// Check if gpu can support the size
|
||||||
if( !vsi_nn_kernel_gpu_check_shape(
|
if( !vsi_nn_kernel_gpu_check_shape(
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,10 @@ DEF_KERNEL_INITIALIZER(_grucell_activation_sma_initializer)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
vsi_status status = VSI_FAILURE;
|
vsi_status status = VSI_FAILURE;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(node);
|
||||||
|
VSI_UNREFERENCED(param);
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
// vsi_nn_kernel_tensor_attr * attr[2] = { NULL };
|
// vsi_nn_kernel_tensor_attr * attr[2] = { NULL };
|
||||||
// attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
// attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
// attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
// attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -172,6 +176,8 @@ static vsi_nn_kernel_node_t _setup
|
||||||
vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_SMA_PARAM_NUM] = {NULL};
|
vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_SMA_PARAM_NUM] = {NULL};
|
||||||
vsi_nn_kernel_node_t node = NULL;
|
vsi_nn_kernel_node_t node = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(params);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Check if gpu can support the size
|
// Check if gpu can support the size
|
||||||
if( !vsi_nn_kernel_gpu_check_shape(
|
if( !vsi_nn_kernel_gpu_check_shape(
|
||||||
|
|
|
||||||
|
|
@ -118,6 +118,8 @@ DEF_KERNEL_INITIALIZER(_grucell_activation_z_h_initializer)
|
||||||
vsi_nn_kernel_tensor_t input = NULL;
|
vsi_nn_kernel_tensor_t input = NULL;
|
||||||
vsi_nn_kernel_tensor_attr_t* input_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t* input_attr = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_Z_H_HSTATE];
|
input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_Z_H_HSTATE];
|
||||||
|
|
||||||
input_attr = vsi_nn_kernel_tensor_attr_create( input );
|
input_attr = vsi_nn_kernel_tensor_attr_create( input );
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,8 @@ DEF_KERNEL_INITIALIZER(_grucell_h_times_activation_r_initializer)
|
||||||
vsi_nn_kernel_tensor_t output = NULL;
|
vsi_nn_kernel_tensor_t output = NULL;
|
||||||
vsi_nn_kernel_tensor_attr_t* output_attr;
|
vsi_nn_kernel_tensor_attr_t* output_attr;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
output = (vsi_nn_kernel_tensor_t)param[3];
|
output = (vsi_nn_kernel_tensor_t)param[3];
|
||||||
|
|
||||||
output_attr = vsi_nn_kernel_tensor_attr_create( output );
|
output_attr = vsi_nn_kernel_tensor_attr_create( output );
|
||||||
|
|
|
||||||
|
|
@ -120,6 +120,8 @@ DEF_KERNEL_INITIALIZER(_grucell_reset_after_activation_initializer)
|
||||||
vsi_nn_kernel_tensor_t input = NULL;
|
vsi_nn_kernel_tensor_t input = NULL;
|
||||||
vsi_nn_kernel_tensor_attr_t* input_attr = NULL;
|
vsi_nn_kernel_tensor_attr_t* input_attr = NULL;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_H_STATE];
|
input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_H_STATE];
|
||||||
|
|
||||||
input_attr = vsi_nn_kernel_tensor_attr_create( input );
|
input_attr = vsi_nn_kernel_tensor_attr_create( input );
|
||||||
|
|
|
||||||
|
|
@ -188,6 +188,8 @@ DEF_KERNEL_INITIALIZER(_instancenorm_sums_initializer)
|
||||||
vsi_ssize_t height = 0;
|
vsi_ssize_t height = 0;
|
||||||
vsi_ssize_t chn = 0;
|
vsi_ssize_t chn = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||||
|
|
@ -255,6 +257,8 @@ DEF_KERNEL_INITIALIZER(_instancenorm_initializer)
|
||||||
vsi_ssize_t height = 0;
|
vsi_ssize_t height = 0;
|
||||||
vsi_ssize_t chn = 0;
|
vsi_ssize_t chn = 0;
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(param_size);
|
||||||
|
|
||||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] );
|
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] );
|
||||||
|
|
@ -405,6 +409,9 @@ static vsi_nn_kernel_node_t _setup
|
||||||
float output_scale = 1.0f / vsi_nn_get_tensor_scale(outputs[0]);
|
float output_scale = 1.0f / vsi_nn_get_tensor_scale(outputs[0]);
|
||||||
float inv_multiplier = (float)1.0 / (float)(width * height);
|
float inv_multiplier = (float)1.0 / (float)(width * height);
|
||||||
|
|
||||||
|
VSI_UNREFERENCED(input_num);
|
||||||
|
VSI_UNREFERENCED(output_num);
|
||||||
|
|
||||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||||
outputs[0]->attr.dim_num ) )
|
outputs[0]->attr.dim_num ) )
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue