Update prebuilt && internal for 23Q2 release (#617)

* Update prebuilt-sdk to 6.4.15 release

Type: Code Improvement
Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>

* Update internal to 1.1.84 rel

Update internal to SHA 1e591108dddcbf6dd88d5eef97a7d8b3ffc19ce3

Type: Code Improvement
Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>

---------

Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
This commit is contained in:
Chen Feiyue 2023-07-08 23:38:17 +08:00 committed by GitHub
parent 02d6d72946
commit 32c5a61601
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
475 changed files with 26346 additions and 4350 deletions

View File

@ -1 +1 @@
6.4.14_CL650117A_D650117_A648302_R647402_T648811_O646970 6.4.15_CL690884A_D690855_A690484_R690194_T690259_O688896

View File

@ -1340,6 +1340,21 @@ VX_API_ENTRY vx_status VX_API_CALL vxAssignNodeCallback(vx_node node, vx_nodecom
*/ */
VX_API_ENTRY vx_nodecomplete_f VX_API_CALL vxRetrieveNodeCallback(vx_node node); VX_API_ENTRY vx_nodecomplete_f VX_API_CALL vxRetrieveNodeCallback(vx_node node);
/*! \brief Assigns a callback to a node.
* If a callback already exists in this node, this function must return an error
* and the user may clear the callback by passing a NULL pointer as the callback.
* \param [in] node The reference to the node.
* \param [in] callback The callback to associate with completion of this
* specific node.
* \warning This must be used with <b><i>extreme</i></b> caution as it can \e ruin
* optimizations in the power/performance efficiency of a graph.
* \return A <tt>\ref vx_status_e</tt> enumeration.
* \retval VX_SUCCESS Callback assigned; any other value indicates failure.
* \retval VX_ERROR_INVALID_REFERENCE node is not a valid <tt>\ref vx_node</tt> reference.
* \ingroup group_node_callback
*/
VX_API_ENTRY vx_status VX_API_CALL vxAssignNodeQueryCallback(vx_node node, vx_nodequery_f callback);
/*! \brief Sets the node target to the provided value. A success invalidates the graph /*! \brief Sets the node target to the provided value. A success invalidates the graph
* that the node belongs to (<tt>\ref vxVerifyGraph</tt> must be called before the next execution) * that the node belongs to (<tt>\ref vxVerifyGraph</tt> must be called before the next execution)
* \param [in] node The reference to the <tt>\ref vx_node</tt> object. * \param [in] node The reference to the <tt>\ref vx_node</tt> object.

View File

@ -503,6 +503,40 @@ enum vx_kernel_e {
VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x33, VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x33,
VX_KERNEL_NN_FUSED_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x34,
VX_KERNEL_NN_CONVOLUTION_RELU_POOLING_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x35,
VX_KERNEL_NN_LAYER_NORMALIZATION_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x36,
VX_KERNEL_NN_INSTANCE_NORMALIZATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x37,
VX_KERNEL_NN_GROUP_NORMALIZATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x38,
VX_KERNEL_NN_LOGICAL_OPS_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x39,
VX_KERNEL_NN_LOGICAL_NOT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x40,
VX_KERNEL_NN_RELATIONAL_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x41,
VX_KERNEL_NN_TENSOR_REDUCE_MAX = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x42,
VX_KERNEL_NN_MAXIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x43,
VX_KERNEL_NN_MINIMUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x44,
VX_KERNEL_NN_TENSOR_SELECT_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x45,
VX_KERNEL_NN_REDUCE_SUM_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x46,
VX_KERNEL_NN_GRU_CELL_ACTIVATION_Z_H_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x47,
VX_KERNEL_NN_GRU_CELL_H_TIMES_ACTIVATION_R_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x48,
VX_KERNEL_NN_GRU_CELL_RESET_AFTER_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x49,
VX_KERNEL_NN_LSTM_ACTIVATION_SP_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x50,
VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */ VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
}; };

View File

@ -214,7 +214,7 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
1: support 1: support
*/ */
#ifndef VX_STREAM_PROCESSOR_SUPPORT #ifndef VX_STREAM_PROCESSOR_SUPPORT
#define VX_STREAM_PROCESSOR_SUPPORT 0 #define VX_STREAM_PROCESSOR_SUPPORT 1
#endif #endif
/* /*
@ -258,5 +258,144 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
#define VX_ACTIVATION_EXT2_SUPPORT 1 #define VX_ACTIVATION_EXT2_SUPPORT 1
#endif #endif
/*
VX_TENSORVIEW_ON_ANY_DIM is used to declare that ovxlib can do optimization for all concat node(all dimision) to tensor view if possiable, not only channel.
[value]
0: disable
1: enable
*/
#ifndef VX_TENSORVIEW_ON_ANY_DIM
#define VX_TENSORVIEW_ON_ANY_DIM 0
#endif
/*
VX_DEPTH2SPACE_CRD_MODE_SUPPORT is used to declare that SPACE2DEPTH can support CRD mode
[value]
0: not support
1: support
*/
#ifndef VX_DEPTH2SPACE_CRD_MODE_SUPPORT
#define VX_DEPTH2SPACE_CRD_MODE_SUPPORT 1
#endif
/*
VX_LAYER_NORMALIZATION_VX_SUPPORT is used to declare driver support layer normalization layer.
[value]
0: not support
1: support
*/
#ifndef VX_LAYER_NORMALIZATION_VX_SUPPORT
#define VX_LAYER_NORMALIZATION_VX_SUPPORT 1
#endif
/*
VX_LAYER_NORMALIZATION_VX_SUPPORT is used to declare driver support layer normalization layer.
[value]
0: not support
1: support
*/
#ifndef VX_INSTANCE_NORMALIZATION_VX_SUPPORT
#define VX_INSTANCE_NORMALIZATION_VX_SUPPORT 1
#endif
/*
VX_GROUP_NORMALIZATION_VX_SUPPORT is used to declare driver support layer normalization layer.
[value]
0: not support
1: support
*/
#ifndef VX_GROUP_NORMALIZATION_VX_SUPPORT
#define VX_GROUP_NORMALIZATION_VX_SUPPORT 1
#endif
/*
VX_LOGICAL_VX_SUPPORT is used to declare driver support layer logical related layer.
[value]
0: not support
1: support
*/
#ifndef VX_LOGICAL_VX_SUPPORT
#define VX_LOGICAL_VX_SUPPORT 1
#endif
/*
VX_RELATIONAL_OPS_VX_SUPPORT is used to declare driver support layer relational related layer.
[value]
0: not support
1: support
*/
#ifndef VX_RELATIONAL_OPS_VX_SUPPORT
#define VX_RELATIONAL_OPS_VX_SUPPORT 1
#endif
/*
VX_REDUCE_MAX_VX_SUPPORT is used to declare driver support layer reduce max layer.
[value]
0: not support
1: support
*/
#ifndef VX_REDUCE_MAX_VX_SUPPORT
#define VX_REDUCE_MAX_VX_SUPPORT 1
#endif
/*
VX_REDUCE_MEAN_VX_SUPPORT is used to declare driver support layer reduce mean layer.
[value]
0: not support
1: support
*/
#ifndef VX_REDUCE_MEAN_VX_SUPPORT
#define VX_REDUCE_MEAN_VX_SUPPORT 1
#endif
/*
VX_REDUCE_SUM_VX_SUPPORT is used to declare driver support layer reduce sum layer.
[value]
0: not support
1: support
*/
#ifndef VX_REDUCE_SUM_VX_SUPPORT
#define VX_REDUCE_SUM_VX_SUPPORT 1
#endif
/*
VX_MAX_MIN_IMUM_VX_SUPPORT is used to declare driver support maximum and minimum layer.
[value]
0: not support
1: support
*/
#ifndef VX_MAX_MIN_IMUM_VX_SUPPORT
#define VX_MAX_MIN_IMUM_VX_SUPPORT 1
#endif
/*
VX_TENSOR_SELECR_VX_SUPPORT is used to declare driver support tensor select layer.
[value]
0: not support
1: support
*/
#ifndef VX_TENSOR_SELECT_VX_SUPPORT
#define VX_TENSOR_SELECT_VX_SUPPORT 1
#endif
/*
VX_GRU_CELL_VX_SUPPORT is used to declare driver support gru cell layer.
[value]
0: not support
1: support
*/
#ifndef VX_GRU_CELL_VX_SUPPORT
#define VX_GRU_CELL_VX_SUPPORT 1
#endif
/*
VX_LSTM_ACTIVATION_SUPPORT is used to declare driver support gru cell layer.
[value]
0: not support
1: support
*/
#ifndef VX_LSTM_ACTIVATION_SUPPORT
#define VX_LSTM_ACTIVATION_SUPPORT 1
#endif
#endif /* __VX_KHR_COMPATIBLE_H__ */ #endif /* __VX_KHR_COMPATIBLE_H__ */

View File

@ -395,6 +395,17 @@ enum vx_tensor_lifetime_type_e
VX_TENSOR_LIFE_TIME_DYNAMIC, VX_TENSOR_LIFE_TIME_DYNAMIC,
}; };
/*! \brief Specifies depthtospace mode
* \ingroup group_cnn
*/
enum vx_nn_depth_to_space_mode_e
{
/*! \brief DCR(default) for depth-column-row order re-arrangement */
VX_NN_DEPTH_TO_SPACE_DCR = 0x0,
/*! \brief CRD for column-row-depth order re-arrangement */
VX_NN_DEPTH_TO_SPACE_CRD,
};
typedef struct _vx_nn_convolution_3d_params_t typedef struct _vx_nn_convolution_3d_params_t
{ {
vx_int32 padding_w_left; /*!< \brief Number of elements added at each side in the left of w dimension of the input. */ vx_int32 padding_w_left; /*!< \brief Number of elements added at each side in the left of w dimension of the input. */
@ -972,6 +983,16 @@ typedef struct _vx_nn_mean_params_t
vx_int32 keep_dims; /*!< \brief Keep dims, if positive, retains reduced dims with length 1 */ vx_int32 keep_dims; /*!< \brief Keep dims, if positive, retains reduced dims with length 1 */
} vx_nn_mean_params_t; } vx_nn_mean_params_t;
/*! \brief Input parameter for reducesum layer
* \ingroup group_cnn
*\version 0.5
*/
typedef struct _vx_nn_sum_params_t
{
vx_tensor axis; /*!< \brief 1D axis tensor of reduce dims </tt> */
vx_int32 keep_dims; /*!< \brief Keep dims, if positive, retains reduced dims with length 1 */
} vx_nn_sum_params_t;
/*! \brief Input parameter for tensor squeeze layer /*! \brief Input parameter for tensor squeeze layer
* \ingroup group_cnn * \ingroup group_cnn
*\version 0.5 *\version 0.5
@ -1254,6 +1275,12 @@ typedef struct _vx_nn_reorg_params_ext2_t
vx_int32 *axis; vx_int32 *axis;
} vx_nn_reorg_params_ext2_t; } vx_nn_reorg_params_ext2_t;
typedef struct _vx_nn_reorg_params_ext3_t
{
vx_nn_reorg_params_ext2_t base; /*!< \brief vx_nn_reorg_params <tt>\ref vx_nn_reorg_params_t</tt> */
vx_enum mode; /*!< \brief [Optional] Only for DEPH2SPACE */
} vx_nn_reorg_params_ext3_t;
/*! \brief [Graph] Creates a Reorgnization Layer Node, Enhancement of vxReorgLayer, Support both DEPTH to SPACE and SPACE to DEPTH. /*! \brief [Graph] Creates a Reorgnization Layer Node, Enhancement of vxReorgLayer, Support both DEPTH to SPACE and SPACE to DEPTH.
* \param [in] graph The reference to the parent graph. * \param [in] graph The reference to the parent graph.
* \param [in] input The input tensor data to reorg. * \param [in] input The input tensor data to reorg.
@ -1911,6 +1938,21 @@ VX_API_ENTRY vx_node VX_API_CALL vxRPNLayer(
vx_tensor score_output vx_tensor score_output
); );
/*! \brief Input parameters for a lstm activation operation.
* \ingroup group_cnn
* \version 0.3
*/
typedef struct _vx_nn_lstm_activation_params_t
{
vx_int32 is_ln;
vx_int32 is_cifg;
vx_int32 is_proj;
vx_int32 is_hybrid;
vx_int32 is_peephole;
vx_int32 recurrent_activation;
vx_float32 forget_bias;
} vx_nn_lstm_activation_params_t;
/*! \brief Input parameters for a lstm operation. /*! \brief Input parameters for a lstm operation.
* \ingroup group_cnn * \ingroup group_cnn
* \version 0.3 * \version 0.3
@ -2115,6 +2157,28 @@ VX_API_ENTRY vx_node VX_API_CALL vxTensorMeanNode(
vx_size size_of_mean_param, vx_size size_of_mean_param,
vx_tensor outputs); vx_tensor outputs);
/*! \brief [Graph] Creates sum layer node.
* \details
* Computes the sum of elements across dimensions of a tensor.
*
* \param [in] graph The handle to the graph.
* \param [in] input A n-D tensor, specifying the input.
* \param [in] sum_params paraments <tt>\ref vx_nn_sum_params_t </tt>.
* \param [in] size_of_sum_param [static] The size of the vx_nn_mean_params_t.
* \param [out] output A n-D tensor of the same type as input.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_tensor
* \version 0.5
*/
VX_API_ENTRY vx_node VX_API_CALL vxReduceSumNode(
vx_graph graph,
vx_tensor inputs,
const vx_nn_sum_params_t *sum_params,
vx_size size_of_sum_param,
vx_tensor outputs);
/*! \brief [Graph] Creates squeeze layer node. /*! \brief [Graph] Creates squeeze layer node.
* \details * \details
* Remove dimensions of size 1 from the input tensor. * Remove dimensions of size 1 from the input tensor.
@ -2287,6 +2351,282 @@ VX_API_ENTRY vx_node VX_API_CALL vxConv3dLayer(vx_graph graph, vx_tensor inputs,
*/ */
VX_API_ENTRY vx_node VX_API_CALL vxDeconv3dLayer(vx_graph graph, vx_tensor inputs, vx_tensor weights, vx_tensor biases, const vx_nn_deconvolution_3d_params_t *convolution_params, vx_size size_of_deconv_params, vx_tensor outputs); VX_API_ENTRY vx_node VX_API_CALL vxDeconv3dLayer(vx_graph graph, vx_tensor inputs, vx_tensor weights, vx_tensor biases, const vx_nn_deconvolution_3d_params_t *convolution_params, vx_size size_of_deconv_params, vx_tensor outputs);
/*! \brief [Graph] Creates a layer Normalization Node.
* \details Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
* \param [in] graph The handle to the graph.
* \param [in] eps [static] Float 32. Small value to add to the variance estimate so that we don't divide by zero.(default is 1e-5)
* \param [in] axis [static] The axis on which we need do normalize.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxLayerNormalizationLayer(
vx_graph graph,
vx_float32 eps,
vx_int32 axis,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Creates a layer instance normalization Node.
* \details Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
* \param [in] graph The handle to the graph.
* \param [in] eps [static] Float 32. Small value to add to the variance estimate so that we don't divide by zero.(default is 1e-5)
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxInstanceNormalizationLayer(
vx_graph graph,
vx_float32 eps,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Creates a layer instance normalization Node.
* \details Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1.
* \param [in] graph The handle to the graph.
* \param [in] eps [static] Float 32. Small value to add to the variance estimate so that we don't divide by zero.(default is 1e-5)
* \param [in] group_num [static] Int 32. Number of groups for GN
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxGroupNormalizationLayer(
vx_graph graph,
vx_float32 eps,
vx_int32 group_num,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Creates a layer logical ops Node.
* \details Return the truth value of x AND, XOR,OR y element-wise.
* \param [in] graph The handle to the graph.
* \param [in] ops_type [static] Int 32. Operation Type
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxLogicalOpsLayer(
vx_graph graph,
vx_int32 ops_type,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Creates a layer logical not Node.
* \details Return the truth value of not x element-wise.
* \param [in] graph The handle to the graph.
* \param [in] input [static] The input tensor data.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxLogicalNotLayer(
vx_graph graph,
vx_tensor input,
vx_tensor output
);
/*! \brief [Graph] Creates a layer relational Node.
* \param [in] graph The handle to the graph.
* \param [in] ops_type [static] Int 32. Operation Type
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxRelationalLayer(
vx_graph graph,
vx_int32 ops_type,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Computes the max of elements across dimensions of input tensor.
* \param [in] graph The handle to the graph.
* \param [in] in input tensor data,
* \param [in] axis [static] used to determine max across which dimension(dimension 0 means width, etc). If not given, compute the sum across all dimensions.
* \param [in] keep_dim [static] means if keep the dimesion count.
* \param [out] out output tensor data.
* \ingroup group_tensor
* \return <tt> vx_node</tt>.
* \retval 0 Node could not be created.
* \retval * Node handle.
* \version 0.3
*/
VX_API_ENTRY vx_node VX_API_CALL vxTensorReduceMaxNode(
vx_graph graph,
vx_tensor inputs,
vx_tensor axis,
vx_bool keep_dims,
vx_tensor outputs);
/*! \brief [Graph] Creates a layer minumum Node.
* \param [in] graph The handle to the graph.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxMinimumLayer(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Creates a layer maximum Node.
* \param [in] graph The handle to the graph.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxMaximumLayer(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Creates a layer select Node.
* \param [in] graph The handle to the graph.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [out] output [static] The output tensor data.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxTensorSelectLayer(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor output
);
/*! \brief [Graph] Creates a layer gru cell activation z h Node.
* \param [in] graph The handle to the graph.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [in] recurrent_activation [static] recurrent activation type.
* \param [in] activation [static] activation type.
* \param [out] output_list [static] The output tensor data.
* \param [out] output_count [static] The output tensor number.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxGruCellActivationZHLayer(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
vx_int32 recurrent_activation,
vx_int32 activation,
vx_tensor* output_list,
vx_uint32 output_count
);
/*! \brief [Graph] Creates a layer gru cell h times activation r Node.
* \param [in] graph The handle to the graph.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [in] recurrent_activation [static] recurrent activation type.
* \param [out] output_list [static] The output tensor data.
* \param [out] output_count [static] The output tensor number.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxGruCellHTimeActivationRLayer(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
vx_int32 recurrent_activation,
vx_tensor* output_list,
vx_uint32 output_count
);
/*! \brief [Graph] Creates a layer gru cell reset after activationNode.
* \param [in] graph The handle to the graph.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [in] recurrent_activation [static] recurrent activation type.
* \param [in] activation [static] activation type.
* \param [out] output_list [static] The output tensor data.
* \param [out] output_count [static] The output tensor number.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxGruCellResetAfterActivationLayer(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
vx_int32 recurrent_activation,
vx_int32 activation,
vx_tensor* output_list,
vx_uint32 output_count
);
/*! \brief [Graph] Creates a layer lstm activation Node.
* \param [in] graph The handle to the graph.
* \param [in] input_list [static] The input tensor data.
* \param [in] input_count [static] The input tensor number.
* \param [in] lstm_activation_param <tt>\ref vx_nn_lstm_activation_params_t </tt>.
* \param [out] output_list [static] The output tensor data.
* \param [out] output_count [static] The output tensor number.
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_cnn
*/
VX_API_ENTRY vx_node VX_API_CALL vxLSTMActivationLayer(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
const vx_nn_lstm_activation_params_t * lstm_activation_param,
vx_tensor* output_list,
vx_uint32 output_count
);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -242,6 +242,48 @@ typedef struct _vx_nn_convolution_relu_pooling_params_ext7_t
vx_bool isSub; vx_bool isSub;
} vx_nn_convolution_relu_pooling_params_ext7_t, * vx_nn_convolution_relu_pooling_params_ext7; } vx_nn_convolution_relu_pooling_params_ext7_t, * vx_nn_convolution_relu_pooling_params_ext7;
typedef struct _vx_nn_fused_sp_params_t
{
vx_enum multi_sp_kernel_type;
/*!<for mul>*/
vx_scalar mul_scale;
/*!<for sp>*/
union
{
struct
{
vx_scalar linear_a, linear_b;
} linear;
struct
{
vx_scalar tanh_a, tanh_b;
float a_v, b_v;
} tanh_linear;
struct
{
vx_scalar hsigmoid_a, hsigmoid_b;
} hsigmoid;
struct
{
vx_scalar clip_a, clip_b;
} clip;
struct
{
vx_scalar scalar_a, scalar_b, scalar_c, scalar_d;
} params;
} scalar_params;
/*!<for other kernel>*/
} vx_nn_fused_sp_params_t, * vx_nn_fused_sp_params;
typedef struct _vx_nn_convolution_relu_pooling_params_sp_ext_t
{
vx_nn_convolution_relu_pooling_params_ext4_t ext4; /*!< \brief convolution relu pooling params <tt>\ref vx_nn_convolution_relu_pooling_params_ext_t</tt> */
vx_object_array inputs_list;
vx_object_array outputs_list;
vx_nn_fused_sp_params_t sp_param;
} vx_nn_convolution_relu_pooling_params_sp_ext_t, * vx_nn_convolution_relu_pooling_params_sp_ext;
/*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion. /*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion.
* \details This function implement Convolutional Network Convolution and Activation(Relu) and Pooling layer. * \details This function implement Convolutional Network Convolution and Activation(Relu) and Pooling layer.
* For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined, * For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined,
@ -1129,6 +1171,48 @@ VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingLayer(vx_graph graph,
const vx_nn_gemm_relu_pooling_params merge_param, const vx_nn_gemm_relu_pooling_params merge_param,
vx_tensor output); vx_tensor output);
/*! \brief Create a fuse stream process node.
* \param [in] graph The handle to the graph.
* \param [in] input_list input tensor list.
* \param [in] input_count input tensor number.
* \param [in] output_list output tensor list.
* \param [in] output_count output tensor number.
* \param [in] params the parameters for multi streamprocessor merging.
* \return <tt>\ref vx_node</tt>.
* \retval vx_node A node reference. Any possible errors preventing a successful creation
* should be checked using <tt>\ref vxGetStatus</tt>
* \ingroup group_vision_function_sp
*/
VX_API_ENTRY vx_node VX_API_CALL vxFusedSpNode(
vx_graph graph,
vx_tensor* input_list,
vx_uint32 input_count,
vx_tensor* output_list,
vx_uint32 output_count,
const vx_nn_fused_sp_params_t * params
);
/*! \brief Create a conv fuse stream process node.
* \param [in] graph The handle to the graph.
* \param [in] inputs input tensor.
* \param [in] weights_biases [static] Point to WeightBiasesParameter data, vx_weights_biases_parameter is an opaque reference.
* \param [in] convolution_relu_pooling_params [static] Pointer to parameters of type <tt>\ref vx_nn_convolution_relu_pooling_params_t</tt>
* \param [in] size_of_convolution_relu_pooling_params [static] Size in bytes of convolution_relu_pooling_params.
* \param [in] outputs output tensor.
* \return <tt>\ref vx_node</tt>.
* \retval vx_node A node reference. Any possible errors preventing a successful creation
* should be checked using <tt>\ref vxGetStatus</tt>
* \ingroup group_vision_function_sp
*/
VX_API_ENTRY vx_node VX_API_CALL vxConvSpNode(
vx_graph graph,
vx_tensor inputs,
vx_weights_biases_parameter weights_biases,
const vx_nn_convolution_relu_pooling_params_t * convolution_relu_pooling_params,
vx_size size_of_convolution_relu_pooling_params,
vx_tensor outputs
);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -345,16 +345,6 @@ VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINST(
vx_context context vx_context context
); );
/*! \brief Creates an internal reference to a spinst data.
* \param [in] context The reference to the implementation context.
* \return A spinst data reference.
* \Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_object_spinst
*/
VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINSTInternal(
vx_context context
);
/*! \brief Releases a reference to a external spinst object. /*! \brief Releases a reference to a external spinst object.
* The object may not be garbage collected until its total reference count is zero. * The object may not be garbage collected until its total reference count is zero.
* \param [in] spinst_obj The pointer to the spinst data to release. * \param [in] spinst_obj The pointer to the spinst data to release.
@ -368,19 +358,6 @@ VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINST(
vx_spinst *spinst_obj vx_spinst *spinst_obj
); );
/*! \brief Releases a reference to a internal spinst object.
* The object may not be garbage collected until its total reference count is zero.
* \param [in] spinst_obj The pointer to the spinst data to release.
* \post After returning from this function the reference is zeroed.
* \return A <tt>\ref vx_status_e</tt> enumeration.
* \retval VX_SUCCESS No errors; all other values indicate failure
* \retval * An error occurred. See <tt>\ref vx_status_e</tt>.
* \ingroup group_object_spinst
*/
VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINSTInternal(
vx_spinst *spinst_obj
);
/*! \brief Add a instruction to spinst object. /*! \brief Add a instruction to spinst object.
* \param [in] spinst_obj The reference to the spinst object. * \param [in] spinst_obj The reference to the spinst object.
* \param [in] inst_unit_array The units of one instruction. Use a <tt>\ref vx_spinst_unit_param</tt>. * \param [in] inst_unit_array The units of one instruction. Use a <tt>\ref vx_spinst_unit_param</tt>.

View File

@ -477,6 +477,8 @@ enum vx_type_e {
VX_TYPE_SPINST = 0x81B,/*!< \brief A <tt>\ref vx_spinst</tt>. */ VX_TYPE_SPINST = 0x81B,/*!< \brief A <tt>\ref vx_spinst</tt>. */
VX_TYPE_INT4 = 0x81C,/*!< \brief A <tt>\ref signed 4bits tensor.</tt>. */ VX_TYPE_INT4 = 0x81C,/*!< \brief A <tt>\ref signed 4bits tensor.</tt>. */
VX_TYPE_UINT4 = 0x81D,/*!< \brief A <tt>\ref unsigned 4bits tensor.</tt>. */ VX_TYPE_UINT4 = 0x81D,/*!< \brief A <tt>\ref unsigned 4bits tensor.</tt>. */
VX_TYPE_FLOAT8_E4M3 = 0x81E,/*!< \brief A <tt>\ref vx_float8_e4m3</tt>. */
VX_TYPE_FLOAT8_E5M2 = 0x81F,/*!< \brief A <tt>\ref vx_float8_e5m2</tt>. */
}; };
/*! \brief The enumeration of all status codes. /*! \brief The enumeration of all status codes.
@ -803,6 +805,8 @@ enum vx_convert_policy_e {
VX_CONVERT_POLICY_WRAP = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x0, VX_CONVERT_POLICY_WRAP = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x0,
/*! \brief Results are saturated to the bit depth of the output operand. */ /*! \brief Results are saturated to the bit depth of the output operand. */
VX_CONVERT_POLICY_SATURATE = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x1, VX_CONVERT_POLICY_SATURATE = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CONVERT_POLICY) + 0x1,
/*! \brief Results preserve infinity and nan value. */
VX_CONVERT_POLICY_INF = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_CONVERT_POLICY) + 0x0,
}; };
/*! \brief Based on the VX_DF_IMAGE definition. /*! \brief Based on the VX_DF_IMAGE definition.

View File

@ -6,3 +6,6 @@ DEF_NODE_TYPE(custom_ainr_denoise_postprocess)
DEF_NODE_TYPE(custom_warp_affine) DEF_NODE_TYPE(custom_warp_affine)
DEF_NODE_TYPE(custom_warp_perspective) DEF_NODE_TYPE(custom_warp_perspective)
DEF_NODE_TYPE(custom_sample) DEF_NODE_TYPE(custom_sample)
DEF_NODE_TYPE(custom_tiny_yolov4_postprocess)
DEF_NODE_TYPE(custom_tiny_yolov4_postprocess_confidence)
DEF_NODE_TYPE(custom_tiny_yolov4_postprocess_box)

View File

@ -6,3 +6,6 @@ DEF_OP(CUSTOM_AINR_DENOISE_POSTPROCESS)
DEF_OP(CUSTOM_WARP_AFFINE) DEF_OP(CUSTOM_WARP_AFFINE)
DEF_OP(CUSTOM_WARP_PERSPECTIVE) DEF_OP(CUSTOM_WARP_PERSPECTIVE)
DEF_OP(CUSTOM_SAMPLE) DEF_OP(CUSTOM_SAMPLE)
DEF_OP(CUSTOM_TINY_YOLOV4_POSTPROCESS)
DEF_OP(CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE)
DEF_OP(CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX)

View File

@ -0,0 +1,47 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_H
#define _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_custom_tiny_yolov4_postprocess_param
{
struct _custom_tiny_yolov4_postprocess_local_data_t* local;
// Add parameters here
} vsi_nn_custom_tiny_yolov4_postprocess_param;
_compiler_assert(offsetof(vsi_nn_custom_tiny_yolov4_postprocess_param, local) == 0, \
vsi_nn_custom_tiny_yolov4_postprocess_h );
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,49 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX_H
#define _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_custom_tiny_yolov4_postprocess_box_param
{
struct _custom_tiny_yolov4_postprocess_box_local_data_t* local;
// Add parameters here
float bias_0;
float bias_1;
} vsi_nn_custom_tiny_yolov4_postprocess_box_param;
_compiler_assert(offsetof(vsi_nn_custom_tiny_yolov4_postprocess_box_param, local) == 0, \
vsi_nn_custom_tiny_yolov4_postprocess_box_h );
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,47 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_H
#define _VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_custom_tiny_yolov4_postprocess_confidence_param
{
struct _custom_tiny_yolov4_postprocess_confidence_local_data_t* local;
// Add parameters here
} vsi_nn_custom_tiny_yolov4_postprocess_confidence_param;
_compiler_assert(offsetof(vsi_nn_custom_tiny_yolov4_postprocess_confidence_param, local) == 0, \
vsi_nn_custom_tiny_yolov4_postprocess_confidence_h );
#ifdef __cplusplus
}
#endif
#endif

View File

@ -38,6 +38,7 @@ typedef struct _vsi_nn_custom_warp_affine_param
const float *matrix; const float *matrix;
vsi_enum type; vsi_enum type;
int32_t size[2]; int32_t size[2];
vsi_enum rgb_type;
} vsi_nn_custom_warp_affine_param; } vsi_nn_custom_warp_affine_param;
_compiler_assert(offsetof(vsi_nn_custom_warp_affine_param, local) == 0, \ _compiler_assert(offsetof(vsi_nn_custom_warp_affine_param, local) == 0, \
vsi_nn_custom_warp_affine_h ); vsi_nn_custom_warp_affine_h );

View File

@ -31,5 +31,8 @@
#include "custom/ops/vsi_nn_op_custom_warp_affine.h" #include "custom/ops/vsi_nn_op_custom_warp_affine.h"
#include "custom/ops/vsi_nn_op_custom_warp_perspective.h" #include "custom/ops/vsi_nn_op_custom_warp_perspective.h"
#include "custom/ops/vsi_nn_op_custom_sample.h" #include "custom/ops/vsi_nn_op_custom_sample.h"
#include "custom/ops/vsi_nn_op_custom_tiny_yolov4_postprocess.h"
#include "custom/ops/vsi_nn_op_custom_tiny_yolov4_postprocess_confidence.h"
#include "custom/ops/vsi_nn_op_custom_tiny_yolov4_postprocess_box.h"
#endif #endif

View File

@ -193,3 +193,4 @@ DEF_OP(REVERSESEQUENCE)
DEF_OP(INVERSE_SIGMOID) DEF_OP(INVERSE_SIGMOID)
DEF_OP(GRID_SAMPLE) DEF_OP(GRID_SAMPLE)
DEF_OP(LPNORM) DEF_OP(LPNORM)
DEF_OP(RESIZE_3D)

1
src/tim/vx/internal/include/internal/internal_ops.def Executable file → Normal file
View File

@ -20,4 +20,3 @@ DEF_OP(SPACE2DEPTH_INTERNAL)
DEF_OP(GRUCELL_H_TIMES_ACTIVATION_R) DEF_OP(GRUCELL_H_TIMES_ACTIVATION_R)
DEF_OP(GRUCELL_ACTIVATION_Z_H) DEF_OP(GRUCELL_ACTIVATION_Z_H)
DEF_OP(REDUCE_MEAN_INTERNAL) DEF_OP(REDUCE_MEAN_INTERNAL)
DEF_OP(BILINEAR_GRID_SAMPLE)

View File

@ -79,6 +79,8 @@ typedef enum
BOOL8, BOOL8,
I4, I4,
U4, U4,
FP8_E4M3,
FP8_E5M2,
} VSI_PUBLIC_TYPE vsi_nn_kernel_dtype_e; } VSI_PUBLIC_TYPE vsi_nn_kernel_dtype_e;
typedef enum typedef enum
@ -89,6 +91,8 @@ typedef enum
VSI_NN_KERNEL_QUANT_ASYMM_PERCHANNEL, VSI_NN_KERNEL_QUANT_ASYMM_PERCHANNEL,
VSI_NN_KERNEL_QUANT_SYMM, VSI_NN_KERNEL_QUANT_SYMM,
VSI_NN_KERNEL_QUANT_SYMM_PERCHANNEL, VSI_NN_KERNEL_QUANT_SYMM_PERCHANNEL,
VSI_NN_KERNEL_QUANT_FLOAT8,
VSI_NN_KERNEL_QUANT_FLOAT8_PERCHANNEL,
VSI_NN_KERNEL_QUANT_TYPE_NUM VSI_NN_KERNEL_QUANT_TYPE_NUM
} vsi_nn_kernel_quant_type_e; } vsi_nn_kernel_quant_type_e;
@ -522,6 +526,10 @@ static VSI_INLINE_API vsi_nn_kernel_dtype_e vsi_nn_kernel_map_dtype
return BF16; return BF16;
case VSI_NN_TYPE_FLOAT32: case VSI_NN_TYPE_FLOAT32:
return F32; return F32;
case VSI_NN_TYPE_FLOAT8_E4M3:
return FP8_E4M3;
case VSI_NN_TYPE_FLOAT8_E5M2:
return FP8_E5M2;
default: default:
VSILOGE("error data type %d", dtype); VSILOGE("error data type %d", dtype);
break; break;
@ -579,6 +587,8 @@ static VSI_INLINE_API size_t vsi_nn_kernel_dtype_get_bytes
case I8: case I8:
case U8: case U8:
case BOOL8: case BOOL8:
case FP8_E4M3:
case FP8_E5M2:
return sizeof(int8_t); return sizeof(int8_t);
case I16: case I16:
case U16: case U16:
@ -611,6 +621,8 @@ static VSI_INLINE_API vsi_size_t vsi_nn_kernel_dtype_get_bits
case I8: case I8:
case U8: case U8:
case BOOL8: case BOOL8:
case FP8_E4M3:
case FP8_E5M2:
return 8; return 8;
case I16: case I16:
case U16: case U16:
@ -879,7 +891,7 @@ static VSI_INLINE_API void vsi_nn_kernel_tensor_attr_get_stride
shape = attr->shape->data; shape = attr->shape->data;
type_bits = vsi_nn_kernel_dtype_get_bits( attr->dtype ); type_bits = vsi_nn_kernel_dtype_get_bits( attr->dtype );
if ( type_bits < BITS_PER_BYTE ) if ( type_bits < BITS_PER_BYTE && type_bits != 0)
{ {
vsi_size_t i; vsi_size_t i;

View File

@ -91,4 +91,21 @@ vsi_bool vsi_nn_kernel_optimize_scatter_elements_shape
vsi_size_t* out_shape_x, uint32_t* out_rank_x, int32_t* out_axis, vsi_size_t max_size vsi_size_t* out_shape_x, uint32_t* out_rank_x, int32_t* out_axis, vsi_size_t max_size
); );
vsi_bool vsi_nn_kernel_optimize_matrixmul_broadcast_shape
(
const vsi_size_t * shape_x,
const vsi_size_t * shape_y,
const vsi_size_t * shape_output,
vsi_size_t rank_x,
vsi_size_t rank_y,
vsi_size_t rank_out,
vsi_size_t* out_shape_x,
vsi_size_t* out_shape_y,
vsi_size_t* out_shape_output,
uint32_t* new_rank,
uint32_t* cross_flg,
uint32_t* size_axis_inner_outer,
uint32_t* strides_axis_inner_outer
);
#endif #endif

View File

@ -82,6 +82,12 @@ typedef struct _vsi_nn_pre_process_param
vsi_nn_pre_process_type_e type; vsi_nn_pre_process_type_e type;
struct
{
float mean[3];
float scale[3];
} norm2;
vsi_nn_pre_process_lcl_data *local; vsi_nn_pre_process_lcl_data *local;
} vsi_nn_pre_process_param; } vsi_nn_pre_process_param;

View File

@ -65,6 +65,10 @@ typedef struct _vsi_nn_pre_process_bgra_param
vsi_bool reverse_channel; vsi_bool reverse_channel;
float r_scale;
float g_scale;
float b_scale;
/* pre process rgb layer local data structure */ /* pre process rgb layer local data structure */
vsi_nn_pre_process_bgra_lcl_data local; vsi_nn_pre_process_bgra_lcl_data local;
} vsi_nn_pre_process_bgra_param; } vsi_nn_pre_process_bgra_param;

View File

@ -70,6 +70,10 @@ typedef struct _vsi_nn_pre_process_nv12_param
vsi_nn_pre_process_nv12_lcl_data* local; vsi_nn_pre_process_nv12_lcl_data* local;
vsi_nn_nv_type nv_type; vsi_nn_nv_type nv_type;
float r_scale;
float g_scale;
float b_scale;
} vsi_nn_pre_process_nv12_param; } vsi_nn_pre_process_nv12_param;
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -76,6 +76,9 @@ typedef struct _vsi_nn_pre_process_rgb_param
vsi_bool reverse_channel; vsi_bool reverse_channel;
float r_scale;
float g_scale;
float b_scale;
/* pre process rgb layer local data structure */ /* pre process rgb layer local data structure */
vsi_nn_pre_process_rgb_lcl_data local; vsi_nn_pre_process_rgb_lcl_data local;
} vsi_nn_pre_process_rgb_param; } vsi_nn_pre_process_rgb_param;

View File

@ -53,6 +53,15 @@ typedef struct _vsi_nn_pre_process_rgb888_planar_param
float g_mean; float g_mean;
float b_mean; float b_mean;
float scale; float scale;
vsi_bool reverse_channel;
vsi_bool enable_rgb88_planar_nhwc;
float r_scale;
float g_scale;
float b_scale;
} vsi_nn_pre_process_rgb888_planar_param; } vsi_nn_pre_process_rgb888_planar_param;
_compiler_assert(offsetof(vsi_nn_pre_process_rgb888_planar_param, local) == 0, \ _compiler_assert(offsetof(vsi_nn_pre_process_rgb888_planar_param, local) == 0, \
vsi_nn_pre_process_rgb888_planar_h ); vsi_nn_pre_process_rgb888_planar_h );

View File

@ -66,6 +66,11 @@ typedef struct _vsi_nn_pre_process_yuv420_param
float rgb_scale; float rgb_scale;
vsi_bool reverse_channel; vsi_bool reverse_channel;
float r_scale;
float g_scale;
float b_scale;
/* local data must be the first. */ /* local data must be the first. */
vsi_nn_pre_process_yuv420_lcl_data local; vsi_nn_pre_process_yuv420_lcl_data local;
} vsi_nn_pre_process_yuv420_param; } vsi_nn_pre_process_yuv420_param;

View File

@ -71,6 +71,10 @@ typedef struct _vsi_nn_pre_process_yuv422_param
float rgb_scale; float rgb_scale;
vsi_bool reverse_channel; vsi_bool reverse_channel;
float r_scale;
float g_scale;
float b_scale;
} vsi_nn_pre_process_yuv422_param; } vsi_nn_pre_process_yuv422_param;
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -66,6 +66,10 @@ typedef struct _vsi_nn_pre_process_yuv444_param
float rgb_scale; float rgb_scale;
vsi_bool reverse_channel; vsi_bool reverse_channel;
float r_scale;
float g_scale;
float b_scale;
/* local data must be the first. */ /* local data must be the first. */
vsi_nn_pre_process_yuv444_lcl_data* local; vsi_nn_pre_process_yuv444_lcl_data* local;
} vsi_nn_pre_process_yuv444_param; } vsi_nn_pre_process_yuv444_param;

View File

@ -22,8 +22,8 @@
* *
*****************************************************************************/ *****************************************************************************/
#ifndef _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H #ifndef _VSI_NN_OP_RESIZE_3D_H
#define _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H #define _VSI_NN_OP_RESIZE_3D_H
#include "vsi_nn_types.h" #include "vsi_nn_types.h"
@ -31,17 +31,19 @@
extern "C" { extern "C" {
#endif #endif
typedef struct _vsi_nn_resize_3d_local_data {
vsi_bool use_internal_node;
} vsi_nn_resize_3d_local_data;
typedef struct _vsi_nn_bilinear_grid_sample_param typedef struct _vsi_nn_resize_3d_param
{ {
struct _bilinear_grid_sample_local_data_t* local; vsi_nn_resize_3d_local_data* lcl_data;
vsi_bool align_corners; vsi_enum type;
vsi_nn_pad_mode_e padding_mode; float factor;
int32_t const_val; int32_t size[3];
} vsi_nn_bilinear_grid_sample_param; vsi_bool align_corners;
vsi_bool half_pixel_centers;
_compiler_assert(offsetof(vsi_nn_bilinear_grid_sample_param, local) == 0, \ } vsi_nn_resize_3d_param;
vsi_nn_bilinear_grid_sample_h );
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -33,6 +33,7 @@ extern "C" {
typedef struct _vsi_nn_topk_param typedef struct _vsi_nn_topk_param
{ {
uint32_t k; uint32_t k;
int32_t axis;
} vsi_nn_topk_param; } vsi_nn_topk_param;
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -52,7 +52,9 @@ enum {
D_BF16 = VSI_NN_TYPE_BFLOAT16, D_BF16 = VSI_NN_TYPE_BFLOAT16,
D_BOOL8 = VSI_NN_TYPE_BOOL8, D_BOOL8 = VSI_NN_TYPE_BOOL8,
D_I4 = VSI_NN_TYPE_INT4, D_I4 = VSI_NN_TYPE_INT4,
D_U4 = VSI_NN_TYPE_UINT4 D_U4 = VSI_NN_TYPE_UINT4,
D_F8_E4M3 = VSI_NN_TYPE_FLOAT8_E4M3,
D_F8_E5M2 = VSI_NN_TYPE_FLOAT8_E5M2
}; };
/* short alias for qtype */ /* short alias for qtype */
@ -63,6 +65,8 @@ enum {
Q_ASYM = VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC << Q_SHIFT, Q_ASYM = VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC << Q_SHIFT,
Q_SYM_PC = VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC << Q_SHIFT, Q_SYM_PC = VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC << Q_SHIFT,
Q_SYM = VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC << Q_SHIFT, Q_SYM = VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC << Q_SHIFT,
Q_FP8 = VSI_NN_QNT_TYPE_SYMMETRIC_FLOAT8 << Q_SHIFT,
Q_FP8_PC = VSI_NN_QNT_TYPE_PERCHANNEL_SYMMETRIC_FLOAT8 << Q_SHIFT,
}; };
typedef struct { typedef struct {

View File

@ -27,6 +27,7 @@
#include "vsi_nn_types.h" #include "vsi_nn_types.h"
#include "vsi_nn_math.h" #include "vsi_nn_math.h"
#include "vsi_nn_tensor.h" #include "vsi_nn_tensor.h"
#include "vsi_nn_log.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -78,6 +79,8 @@ static VSI_INLINE_API vsi_bool type_is_signed
case VSI_NN_TYPE_FLOAT32: case VSI_NN_TYPE_FLOAT32:
case VSI_NN_TYPE_FLOAT64: case VSI_NN_TYPE_FLOAT64:
case VSI_NN_TYPE_BFLOAT16: case VSI_NN_TYPE_BFLOAT16:
case VSI_NN_TYPE_FLOAT8_E4M3:
case VSI_NN_TYPE_FLOAT8_E5M2:
ret = TRUE; ret = TRUE;
break; break;
default: default:
@ -93,9 +96,14 @@ static VSI_INLINE_API uint32_t type_get_bytes
{ {
switch( type ) switch( type )
{ {
case VSI_NN_TYPE_INT4:
case VSI_NN_TYPE_UINT4:
return 0;
case VSI_NN_TYPE_INT8: case VSI_NN_TYPE_INT8:
case VSI_NN_TYPE_UINT8: case VSI_NN_TYPE_UINT8:
case VSI_NN_TYPE_BOOL8: case VSI_NN_TYPE_BOOL8:
case VSI_NN_TYPE_FLOAT8_E4M3:
case VSI_NN_TYPE_FLOAT8_E5M2:
return 1; return 1;
case VSI_NN_TYPE_INT16: case VSI_NN_TYPE_INT16:
case VSI_NN_TYPE_UINT16: case VSI_NN_TYPE_UINT16:
@ -111,7 +119,8 @@ static VSI_INLINE_API uint32_t type_get_bytes
case VSI_NN_TYPE_FLOAT64: case VSI_NN_TYPE_FLOAT64:
return 8; return 8;
default: default:
return 0; VSILOGE("unsupported type: %d", type);
return 1;
} }
} /* type_get_bytes() */ } /* type_get_bytes() */
@ -128,6 +137,8 @@ static VSI_INLINE_API uint32_t type_get_bits
case VSI_NN_TYPE_INT8: case VSI_NN_TYPE_INT8:
case VSI_NN_TYPE_UINT8: case VSI_NN_TYPE_UINT8:
case VSI_NN_TYPE_BOOL8: case VSI_NN_TYPE_BOOL8:
case VSI_NN_TYPE_FLOAT8_E4M3:
case VSI_NN_TYPE_FLOAT8_E5M2:
return 8; return 8;
case VSI_NN_TYPE_INT16: case VSI_NN_TYPE_INT16:
case VSI_NN_TYPE_UINT16: case VSI_NN_TYPE_UINT16:
@ -143,7 +154,8 @@ static VSI_INLINE_API uint32_t type_get_bits
case VSI_NN_TYPE_FLOAT64: case VSI_NN_TYPE_FLOAT64:
return 64; return 64;
default: default:
return 0; VSILOGE("unsupported type: %d", type);
return 1;
} }
} /* type_get_bits() */ } /* type_get_bits() */
@ -236,6 +248,7 @@ static VSI_INLINE_API float affine_to_fp32
) )
{ {
float data; float data;
VSI_UNREFERENCED(type);
data = ( (float)val - zero_point ) * scale; data = ( (float)val - zero_point ) * scale;
return data; return data;
} /* affine_to_fp32() */ } /* affine_to_fp32() */
@ -279,6 +292,7 @@ static VSI_INLINE_API float dfp_to_fp32
) )
{ {
float result; float result;
VSI_UNREFERENCED(type);
if( fl > 0 ) if( fl > 0 )
{ {
result = (float)val * ( 1.0f / ( (float) ( (int64_t)1 << fl ) ) ); result = (float)val * ( 1.0f / ( (float) ( (int64_t)1 << fl ) ) );
@ -440,6 +454,139 @@ static VSI_INLINE_API uint16_t fp32_to_bfp16_rtne
return out; return out;
} /* fp32_to_bfp16_rtne */ } /* fp32_to_bfp16_rtne */
#define FLOAT_BIAS_EXPONENT 127
#define FLOAT_EXPONENT_SIZE 8
#define FLOAT_MANTISSA_SIZE 23
#define FLOAT8_E4M3_BIAS_EXPONENT 7
#define FLOAT8_E4M3_EXPONENT_SIZE 4
#define FLOAT8_E4M3_MANTISSA_SIZE 3
#define FLOAT8_E5M2_BIAS_EXPONENT 15
#define FLOAT8_E5M2_EXPONENT_SIZE 5
#define FLOAT8_E5M2_MANTISSA_SIZE 2
static VSI_INLINE_API uint8_t fp32_to_fp8_e4m3(float in, const float scale) {
float fp8_f32 = in / scale;
int32_t fp8_i32 = *((int32_t*)&fp8_f32);
//int32_t mask = (int32_t)(pow(2, 32) - 1 - (pow(2, 23 - 3) - 1));
int32_t eps = 1 << (23 - 3 - 1);
fp8_i32 += eps;
//fp8_i32 &= mask;
{
int sign = (fp8_i32 >> (FLOAT_EXPONENT_SIZE + FLOAT_MANTISSA_SIZE)) & 0x1;
int exp = (fp8_i32 >> FLOAT_MANTISSA_SIZE) & 0xff;
int expShiftValue = FLOAT8_E4M3_BIAS_EXPONENT - FLOAT_BIAS_EXPONENT;
int mantissa = (fp8_i32 >> (FLOAT_MANTISSA_SIZE - FLOAT8_E4M3_MANTISSA_SIZE)) & 0x7;
exp = (exp + expShiftValue) & 0xF;
return (uint8_t)(sign << 7 | exp << 3 | mantissa);
}
} /* fp32_to_fp8_e4m3() */
static VSI_INLINE_API uint8_t fp32_to_fp8_e5m2(float in, const float scale) {
float fp8_f32 = in / scale;
int32_t fp8_i32 = *((int32_t*)&fp8_f32);
//int32_t mask = (int32_t)(pow(2, 32) - 1 - (pow(2, 23 - 2) - 1));
int32_t eps = 1 << (23 - 2 - 1);
fp8_i32 += eps;
//fp8_i32 &= mask;
{
int sign = (fp8_i32 >> (FLOAT_EXPONENT_SIZE + FLOAT_MANTISSA_SIZE)) & 0x1;
int exp = (fp8_i32 >> FLOAT_MANTISSA_SIZE) & 0xff;
int expShiftValue = FLOAT8_E5M2_BIAS_EXPONENT - FLOAT_BIAS_EXPONENT;
int mantissa = (fp8_i32 >> (FLOAT_MANTISSA_SIZE - FLOAT8_E5M2_MANTISSA_SIZE)) & 0x3;
exp = (exp + expShiftValue) & 0x1F;
return (uint8_t)(sign << 7 | exp << 2 | mantissa);
}
} /* fp32_to_fp8_e5m2() */
static VSI_INLINE_API float fp8_e4m3_to_fp32(uint8_t in, const float scale) {
float val_fp32;
uint32_t signOut = 0;
uint32_t exponentOut = 0;
uint32_t mantissaOut = 0;
uint32_t out_u = 0;
uint32_t signIn;
uint32_t exponentIn;
uint32_t mantissaIn;
int expShiftValue = FLOAT_BIAS_EXPONENT - FLOAT8_E4M3_BIAS_EXPONENT;
signIn = (in >> (FLOAT8_E4M3_EXPONENT_SIZE + FLOAT8_E4M3_MANTISSA_SIZE)) & 0x1;
exponentIn = (in >> FLOAT8_E4M3_MANTISSA_SIZE) & 0xF;
mantissaIn = in & 0x7;
signOut = signIn;
if (exponentIn == 0 && mantissaIn == 0)
{
goto final;
}
if (exponentIn == 0xf && mantissaIn == 0x7)
{
exponentOut = 0xff;
mantissaOut = 0x400000;
goto final;
}
exponentOut = (exponentIn + expShiftValue) & 0xff;
mantissaOut = (mantissaIn << (FLOAT_MANTISSA_SIZE - FLOAT8_E4M3_MANTISSA_SIZE)) & 0x7fffff;
final:
out_u = signOut << 31 | exponentOut << 23 | mantissaOut;
val_fp32 = *((float*)&out_u);
return val_fp32 * scale;
} /* fp8_e4m3_to_fp32() */
static VSI_INLINE_API float fp8_e5m2_to_fp32(int8_t in, const float scale) {
float val_fp32;
uint32_t signOut = 0;
uint32_t exponentOut = 0;
uint32_t mantissaOut = 0;
uint32_t out_u = 0;
uint32_t signIn;
uint32_t exponentIn;
uint32_t mantissaIn;
int expShiftValue = FLOAT_BIAS_EXPONENT - FLOAT8_E5M2_BIAS_EXPONENT;
signIn = (in >> 7) & 0x1;
exponentIn = (in >> 2) & 0x1F;
mantissaIn = in & 0x3;
signOut = signIn;
if (exponentIn == 0 && mantissaIn == 0)
{
goto final;
}
if (exponentIn == 0x1f && mantissaIn == 0x3)
{
exponentOut = 0xff;
mantissaOut = 0x400000;
goto final;
}
exponentOut = (exponentIn + expShiftValue) & 0xff;
mantissaOut = (mantissaIn << (FLOAT_MANTISSA_SIZE - FLOAT8_E5M2_MANTISSA_SIZE)) & 0x7fffff;
final:
out_u = signOut << 31 | exponentOut << 23 | mantissaOut;
val_fp32 = *((float*)&out_u);
return val_fp32 * scale;
} /* fp8_e5m2_to_fp32() */
static VSI_INLINE_API vsi_status dtype_to_float32 static VSI_INLINE_API vsi_status dtype_to_float32
( (
uint8_t *src, uint8_t *src,
@ -458,6 +605,12 @@ static VSI_INLINE_API vsi_status dtype_to_float32
case VSI_NN_TYPE_BFLOAT16: case VSI_NN_TYPE_BFLOAT16:
*dst = bfp16_to_fp32( *(int16_t *)src ); *dst = bfp16_to_fp32( *(int16_t *)src );
break; break;
case VSI_NN_TYPE_FLOAT8_E4M3:
*dst = fp8_e4m3_to_fp32(*(int8_t*)src, src_dtype->scale);
break;
case VSI_NN_TYPE_FLOAT8_E5M2:
*dst = fp8_e5m2_to_fp32(*(int8_t *)src, src_dtype->scale);
break;
case VSI_NN_TYPE_INT4: case VSI_NN_TYPE_INT4:
case VSI_NN_TYPE_UINT4: case VSI_NN_TYPE_UINT4:
case VSI_NN_TYPE_INT8: case VSI_NN_TYPE_INT8:
@ -511,6 +664,12 @@ static VSI_INLINE_API vsi_status float32_to_dtype
case VSI_NN_TYPE_BFLOAT16: case VSI_NN_TYPE_BFLOAT16:
*(int16_t *)dst = fp32_to_bfp16_rtne( src ); *(int16_t *)dst = fp32_to_bfp16_rtne( src );
break; break;
case VSI_NN_TYPE_FLOAT8_E4M3:
*(int8_t *)dst = fp32_to_fp8_e4m3(src, dst_dtype->scale);
break;
case VSI_NN_TYPE_FLOAT8_E5M2:
*(int8_t *)dst = fp32_to_fp8_e5m2(src, dst_dtype->scale);
break;
case VSI_NN_TYPE_INT4: case VSI_NN_TYPE_INT4:
case VSI_NN_TYPE_UINT4: case VSI_NN_TYPE_UINT4:
case VSI_NN_TYPE_INT8: case VSI_NN_TYPE_INT8:

View File

@ -30,7 +30,7 @@
extern "C"{ extern "C"{
#endif #endif
#define vsi_nn_LinkListInitRoot(n) do{n = NULL;} while (0); #define vsi_nn_LinkListInitRoot(n) {n = NULL;}
typedef struct _vsi_nn_link_list typedef struct _vsi_nn_link_list
{ {

View File

@ -53,12 +53,13 @@ extern "C" {
#define DEFINE_ARRAY_TYPE( NAME, TYPE ) \ #define DEFINE_ARRAY_TYPE( NAME, TYPE ) \
typedef struct { \ typedef struct { \
size_t size; \ size_t size; \
TYPE data[0]; \ TYPE *data; \
} vsi_##NAME##_array_t; \ } vsi_##NAME##_array_t; \
static VSI_INLINE_API vsi_##NAME##_array_t * vsi_##NAME##_array_create( size_t size ) { \ static VSI_INLINE_API vsi_##NAME##_array_t * vsi_##NAME##_array_create( size_t size ) { \
vsi_##NAME##_array_t * array = (vsi_##NAME##_array_t *)malloc( \ vsi_##NAME##_array_t * array = NULL; \
sizeof(vsi_##NAME##_array_t) + sizeof(TYPE) * size ); \ array = (vsi_##NAME##_array_t *)malloc( sizeof(vsi_##NAME##_array_t) + sizeof(TYPE) * size ); \
if (array == NULL) return NULL; \ if (array == NULL) return NULL; \
array->data = (TYPE *)(((TYPE**)(&(array->data))) + 1); \
array->size = size; \ array->size = size; \
return array; \ return array; \
} \ } \

View File

@ -50,14 +50,23 @@ extern "C" {
free( _PTR ); _PTR = NULL; } free( _PTR ); _PTR = NULL; }
#define vsi_safe_release_tensor(_t) if(_t){vsi_nn_ReleaseTensor(&(_t)); _t = NULL;} #define vsi_safe_release_tensor(_t) if(_t){vsi_nn_ReleaseTensor(&(_t)); _t = NULL;}
#if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32))
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffebadcaffe) #if defined(_WIN64)
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffebadcaffe)
#else
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffe)
#endif
#else
#define END_OF_VARIADIC_ARGUMENTS ((size_t)0xbadcaffebadcaffe)
#endif
#define FOREACH_ARGS(_args, _next, _arg_type) \ #define FOREACH_ARGS(_args, _next, _arg_type) \
while(((_arg_type)((size_t)END_OF_VARIADIC_ARGUMENTS)) != (_next = va_arg(_args, _arg_type))) while(((_arg_type)((size_t)END_OF_VARIADIC_ARGUMENTS)) != (_next = va_arg(_args, _arg_type)))
#define BITS_PER_BYTE 8 #define BITS_PER_BYTE 8
#define VSI_UNREFERENCED( param ) ( ( void ) ( param ) )
#define VSI_NN_STRINGIZE(X) VSI_NN_DO_STRINGIZE(X) #define VSI_NN_STRINGIZE(X) VSI_NN_DO_STRINGIZE(X)
#define VSI_NN_DO_STRINGIZE(X) #X #define VSI_NN_DO_STRINGIZE(X) #X

View File

@ -78,6 +78,7 @@ typedef struct _vsi_nn_runtime_option_t
int32_t enable_asymi8_to_u8; int32_t enable_asymi8_to_u8;
int32_t enable_dataconvert_optimize; int32_t enable_dataconvert_optimize;
int32_t enable_stream_processor; int32_t enable_stream_processor;
int32_t enable_rgb88_planar_nhwc;
} vsi_nn_runtime_option_t; } vsi_nn_runtime_option_t;
/** /**

View File

@ -31,33 +31,42 @@
#define VSI_ASSERT( cond ) assert(cond) #define VSI_ASSERT( cond ) assert(cond)
#define VSI_CHECK_PTR( pointer, msg, retval ) \ #define VSI_CHECK_PTR( pointer, msg, retval ) \
do { \ { \
if( pointer == NULL ) { \ if( pointer == NULL ) { \
VSILOGD("%s",msg); \ VSILOGD("%s",msg); \
VSI_ASSERT(FALSE); \ VSI_ASSERT(FALSE); \
} \ } \
} while(0) }
#define CHECK_STATUS_FAIL_GOTO( stat, lbl ) do {\ #define CHECK_STATUS_FAIL_GOTO( stat, lbl ) {\
if( VSI_SUCCESS != stat ) {\ if( VSI_SUCCESS != stat ) {\
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\ VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
goto lbl;\ goto lbl;\
}\ }\
} while(0) }
#define CHECK_STATUS( stat ) do {\ #define CHECK_STATUS( stat ) {\
if( VSI_SUCCESS != stat ) {\ if( VSI_SUCCESS != stat ) {\
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\ VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
}\ }\
} while(0) }
#define CHECK_PTR_FAIL_GOTO( pointer, msg, lbl ) \ #define CHECK_PTR_FAIL_GOTO( pointer, msg, lbl ) \
do { \ { \
if( pointer == NULL ) { \ if( pointer == NULL ) { \
VSILOGD("CHECK POINTER %s", msg); \ VSILOGD("CHECK POINTER %s", msg); \
goto lbl; \ goto lbl; \
} \ } \
} while(0) }
#define CHECK_PTR_FAIL_GOTO_RLS_INTERNAL_NODE( pointer, node, msg, lbl ) \
{ \
if( pointer == NULL ) { \
vsi_nn_internal_release_node(&node);\
VSILOGD("CHECK POINTER %s", msg); \
goto lbl; \
} \
}
#endif #endif

View File

@ -1,26 +1,3 @@
/****************************************************************************
*
* Copyright (c) 2019 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the Software),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
/*****Auto generated header file, Please DO NOT modify manually!*****/ /*****Auto generated header file, Please DO NOT modify manually!*****/
#ifndef _VSI_NN_FEATURE_CONFIG_H #ifndef _VSI_NN_FEATURE_CONFIG_H
#define _VSI_NN_FEATURE_CONFIG_H #define _VSI_NN_FEATURE_CONFIG_H
@ -42,5 +19,6 @@
#if defined(VX_TENSORVIEW_ON_ANY_DIM) && VX_TENSORVIEW_ON_ANY_DIM #if defined(VX_TENSORVIEW_ON_ANY_DIM) && VX_TENSORVIEW_ON_ANY_DIM
#define VSI_CONCAT_ENHANCE_SUPPORT #define VSI_CONCAT_ENHANCE_SUPPORT
#endif #endif
#define VSI_CREATE_TENSOR_FROM_VIEW_SUPPORT
#endif #endif

View File

@ -361,6 +361,27 @@ OVXLIB_API vsi_nn_tensor_id_t vsi_nn_AddTensorFromHandle
uint8_t * data uint8_t * data
); );
/**
* Add a new tensor from view
* Create a new tensor from a view and add it to graph.
*
* @param[in] graph Graph handle.
* @param[in] id Required, the id of the parent tensor on which to create view.
* @param[in] start The start cooridinates for each dim, 0-based none-negative interger.
* NULL means copy from the idx 0 of each dim.
* @param[in] end The end cooridinates for each dim, 0-based none-negative interger.
* NULL means copy to the end of each dim. For the given idx, the end[idx]
* should be greater than start[idx].
* @return The new tensor id on success, or VSI_NN_TENSOR_ID_NA otheriwse.
*/
OVXLIB_API vsi_nn_tensor_id_t vsi_nn_AddTensorFromView
(
vsi_nn_graph_t* graph,
vsi_nn_tensor_id_t id,
vsi_size_t* start,
vsi_size_t* end
);
/** /**
* Attach tensor to graph * Attach tensor to graph
* Attach an exist tensor to graph. * Attach an exist tensor to graph.

View File

@ -206,8 +206,8 @@
#include "ops/vsi_nn_op_maxunpool.h" #include "ops/vsi_nn_op_maxunpool.h"
#include "ops/vsi_nn_op_reversesequence.h" #include "ops/vsi_nn_op_reversesequence.h"
#include "ops/vsi_nn_op_grid_sample.h" #include "ops/vsi_nn_op_grid_sample.h"
#include "ops/vsi_nn_op_bilinear_grid_sample.h"
#include "ops/vsi_nn_op_lpnorm.h" #include "ops/vsi_nn_op_lpnorm.h"
#include "ops/vsi_nn_op_resize_3d.h"
/* custom node head define define */ /* custom node head define define */
#include "custom/vsi_nn_custom_node_type.h" #include "custom/vsi_nn_custom_node_type.h"
#include "ops/vsi_nn_op_inverse_sigmoid.h" #include "ops/vsi_nn_op_inverse_sigmoid.h"
@ -402,8 +402,8 @@ typedef union _vsi_nn_nn_param
vsi_nn_reversesequence_param reversesequence; vsi_nn_reversesequence_param reversesequence;
vsi_nn_inverse_sigmoid_param inverse_sigmoid; vsi_nn_inverse_sigmoid_param inverse_sigmoid;
vsi_nn_grid_sample_param gridsample; vsi_nn_grid_sample_param gridsample;
vsi_nn_bilinear_grid_sample_param bilinear_grid_sample;
vsi_nn_lpnorm_param lpnorm; vsi_nn_lpnorm_param lpnorm;
vsi_nn_resize_3d_param resize_3d;
void* client_param; void* client_param;
/* custom node data struct define */ /* custom node data struct define */

View File

@ -48,6 +48,7 @@ typedef enum
VSI_NN_PREPROCESS_IMAGE_RESIZE_BILINEAR, VSI_NN_PREPROCESS_IMAGE_RESIZE_BILINEAR,
VSI_NN_PREPROCESS_IMAGE_RESIZE_NEAREST, VSI_NN_PREPROCESS_IMAGE_RESIZE_NEAREST,
VSI_NN_PREPROCESS_DTYPE_CONVERT, VSI_NN_PREPROCESS_DTYPE_CONVERT,
VSI_NN_PREPROCESS_MEANS_AND_SCALES,
} vsi_nn_preprocess_type_e; } vsi_nn_preprocess_type_e;
/** /**
@ -150,8 +151,25 @@ typedef struct
float scale; float scale;
}vsi_nn_process_mean_and_scale_t; }vsi_nn_process_mean_and_scale_t;
/**
* Process mean and scale parameter structure
*/
typedef struct
{
/** Mean value for each channel */
float* channel_mean;
/*Channel length */
int32_t channel_len;
/** Scale value */
float* scale;
/** Scale length */
int32_t scale_len;
}vsi_nn_process_means_and_scales_t;
typedef vsi_nn_process_mean_and_scale_t vsi_nn_preprocess_mean_and_scale_t; typedef vsi_nn_process_mean_and_scale_t vsi_nn_preprocess_mean_and_scale_t;
typedef vsi_nn_process_means_and_scales_t vsi_nn_preprocess_means_and_scales_t;
typedef vsi_nn_process_mean_and_scale_t vsi_nn_postprocess_mean_and_scale_t; typedef vsi_nn_process_mean_and_scale_t vsi_nn_postprocess_mean_and_scale_t;
typedef vsi_nn_process_means_and_scales_t vsi_nn_postprocess_means_and_scales_t;
/** /**
* Process permute parameter structure * Process permute parameter structure

View File

@ -154,7 +154,7 @@ vsi_nn_internal_tensor_t* vsi_nn_rnn_transpose_time_major
vsi_bool use_virtual_tensor vsi_bool use_virtual_tensor
); );
void vsi_nn_rnn_split_input_tensor vsi_status vsi_nn_rnn_split_input_tensor
( (
vsi_nn_node_t * self, vsi_nn_node_t * self,
vsi_nn_tensor_t * input, vsi_nn_tensor_t * input,
@ -163,7 +163,7 @@ void vsi_nn_rnn_split_input_tensor
vsi_bool use_virtual_tensor vsi_bool use_virtual_tensor
); );
void vsi_nn_rnn_data_check_aligned vsi_status vsi_nn_rnn_data_check_aligned
( (
vsi_nn_node_t * self, vsi_nn_node_t * self,
vsi_nn_tensor_t ** input, vsi_nn_tensor_t ** input,

View File

@ -82,6 +82,10 @@ typedef enum
VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC = 0x4, VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC = 0x4,
/** affine perchannel asymmetric */ /** affine perchannel asymmetric */
VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_ASYMMETRIC = 0x5, VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_ASYMMETRIC = 0x5,
/** float8 */
VSI_NN_QNT_TYPE_SYMMETRIC_FLOAT8 = 0x6,
/** perchannel float8 */
VSI_NN_QNT_TYPE_PERCHANNEL_SYMMETRIC_FLOAT8 = 0x7,
/** undefined type */ /** undefined type */
VSI_NN_QNT_TYPE_NA = 0xff, VSI_NN_QNT_TYPE_NA = 0xff,
} vsi_nn_qnt_type_e; } vsi_nn_qnt_type_e;

View File

@ -734,13 +734,15 @@ vsi_status vsi_nn_copy_tensor_veiw_patch
/** /**
* OVXLIB internal tensor util api * OVXLIB internal tensor util api
* A wrapper api for OpenVX vxCopyTensorPatch * A wrapper api for OpenVX vxCopyTensorPatch
* Allows the application to copy whole tensor patch from/into an tensor object. * Allows the application to copy partial/whole tensor patch from/into an tensor object.
* *
* @param[in] tensor OpenVX Tensor handle. * @param[in] tensor OpenVX Tensor handle.
* @param[in] attr OVXLIB Tensor attr. * @param[in] attr OVXLIB Tensor attr.
* @param[in] user_ptr The address of the memory location where to store the requested data. * @param[in] user_ptr The address of the memory location where to store the requested data.
* @param[in] usage This declares the effect of the copy with regard to the tensor object * @param[in] usage This declares the effect of the copy with regard to the tensor object
* support VX_READ_ONLY or VX_WRITE_ONLY * support VX_READ_ONLY or VX_WRITE_ONLY
* @param[in] start The start cooridinates for each dim. NULL means copy from the idx 0 of each dim.
* @param[in] end The end cooridinates for each dim. NULL means copy to the end of each dim.
* @return VSI_SUCCESS on success, or error core otherwise. * @return VSI_SUCCESS on success, or error core otherwise.
*/ */
vsi_status vsi_nn_copy_tensor_patch vsi_status vsi_nn_copy_tensor_patch
@ -748,7 +750,9 @@ vsi_status vsi_nn_copy_tensor_patch
vx_tensor tensor, vx_tensor tensor,
vsi_nn_tensor_attr_t *attr, vsi_nn_tensor_attr_t *attr,
void * user_ptr, void * user_ptr,
vsi_enum usage vsi_enum usage,
vsi_size_t* start,
vsi_size_t* end
); );
/** /**

View File

@ -31,26 +31,26 @@
extern "C"{ extern "C"{
#endif #endif
#define TEST_CHECK_TENSOR_ID( id, lbl ) do {\ #define TEST_CHECK_TENSOR_ID( id, lbl ) {\
if( VSI_NN_TENSOR_ID_NA == id ) {\ if( VSI_NN_TENSOR_ID_NA == id ) {\
VSILOGE("CHECK TENSOR ID %d", __LINE__);\ VSILOGE("CHECK TENSOR ID %d", __LINE__);\
goto lbl;\ goto lbl;\
}\ }\
} while(0) }
#define TEST_CHECK_PTR( ptr, lbl ) do {\ #define TEST_CHECK_PTR( ptr, lbl ) {\
if( NULL == ptr ) {\ if( NULL == ptr ) {\
VSILOGE("CHECK PTR %d", __LINE__);\ VSILOGE("CHECK PTR %d", __LINE__);\
goto lbl;\ goto lbl;\
}\ }\
} while(0) }
#define TEST_CHECK_STATUS( stat, lbl ) do {\ #define TEST_CHECK_STATUS( stat, lbl ) {\
if( VSI_SUCCESS != stat ) {\ if( VSI_SUCCESS != stat ) {\
VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\ VSILOGE("CHECK STATUS(%d:%s)", (stat), vsi_nn_DescribeStatus(stat));\
goto lbl;\ goto lbl;\
}\ }\
} while(0) }
#if defined(__cplusplus) #if defined(__cplusplus)
} }

View File

@ -191,6 +191,16 @@ typedef enum
VSI_NN_TYPE_BFLOAT16 = VX_TYPE_BFLOAT16, VSI_NN_TYPE_BFLOAT16 = VX_TYPE_BFLOAT16,
#else #else
VSI_NN_TYPE_BFLOAT16 = 0x81A, VSI_NN_TYPE_BFLOAT16 = 0x81A,
#endif
#ifdef VSI_NN_TYPE_FLOAT8_E4M3_SUPPORT
VSI_NN_TYPE_FLOAT8_E4M3 = VX_TYPE_FLOAT8_E4M3,
#else
VSI_NN_TYPE_FLOAT8_E4M3 = 0X81E,
#endif
#ifdef VSI_NN_TYPE_FLOAT8_E5M2_SUPPORT
VSI_NN_TYPE_FLOAT8_E5M2 = VX_TYPE_FLOAT8_E5M2,
#else
VSI_NN_TYPE_FLOAT8_E5M2 = 0X81F,
#endif #endif
VSI_NN_TYPE_VDATA = VX_TYPE_USER_STRUCT_START + 0x1, VSI_NN_TYPE_VDATA = VX_TYPE_USER_STRUCT_START + 0x1,
@ -268,6 +278,11 @@ typedef enum _vsi_nn_roi_align_type_e
VSI_NN_ROI_ALIGN VSI_NN_ROI_ALIGN
} vsi_nn_roi_align_type_e; } vsi_nn_roi_align_type_e;
typedef enum _vsi_nn_custom_warp_affine_type_e {
VSI_NN_WARP_AFFINE_TYPE_NONE = 0,
VSI_NN_WARP_AFFINE_TYPE_RGB
} vsi_nn_custom_warp_affine_type_e;
/** Deprecated */ /** Deprecated */
typedef uint32_t vsi_nn_size_t; typedef uint32_t vsi_nn_size_t;

View File

@ -33,7 +33,7 @@ extern "C"{
#define VSI_NN_VERSION_MAJOR 1 #define VSI_NN_VERSION_MAJOR 1
#define VSI_NN_VERSION_MINOR 1 #define VSI_NN_VERSION_MINOR 1
#define VSI_NN_VERSION_PATCH 74 #define VSI_NN_VERSION_PATCH 84
#define VSI_NN_VERSION \ #define VSI_NN_VERSION \
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH) (VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)

View File

@ -0,0 +1,578 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string.h>
#include <stdlib.h>
#include "vsi_nn_types.h"
#include "vsi_nn_log.h"
#include "vsi_nn_node.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_error.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "vsi_nn_internal_node.h"
#include "utils/vsi_nn_constraint_check.h"
typedef struct _custom_tiny_yolov4_postprocess_local_data_t {
vx_int32 begin_dims[6][VSI_NN_MAX_DIM_NUM];
vx_int32 end_dims[6][VSI_NN_MAX_DIM_NUM];
vx_int32 stride_dims[VSI_NN_MAX_DIM_NUM];
} custom_tiny_yolov4_postprocess_local_data_t;
/*
Declare number of input and output.
*/
#define _INPUT_NUM (4)
#define _OUTPUT_NUM (2)
static vsi_nn_internal_tensor_t *_create_internal_tensor
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input
)
{
vsi_nn_tensor_attr_t attr;
vsi_nn_internal_tensor_t * tensor = NULL;
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
memcpy( &attr.dtype, &input->attr.dtype, sizeof( attr.dtype ) );
attr.dim_num = VSI_NN_DIM_AUTO;
attr.vtl = TRUE;
attr.is_const = FALSE;
tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
return tensor;
} /* _create_internal_tensor() */
static vsi_nn_internal_tensor_t *_create_sigmoid_internal_tensor
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input
)
{
vsi_nn_tensor_attr_t attr;
vsi_nn_internal_tensor_t * tensor = NULL;
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
memcpy( &attr.dtype, &input->attr.dtype, sizeof( attr.dtype ) );
if (attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC ||
attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_SYMMETRIC)
{
attr.dtype.scale = 0.00390625;
attr.dtype.zero_point = 0;
}
attr.dim_num = VSI_NN_DIM_AUTO;
attr.vtl = TRUE;
attr.is_const = FALSE;
tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
return tensor;
} /* _create_sigmoid_internal_tensor() */
static vsi_nn_internal_tensor_t *_create_output_internal_tensor
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * output
)
{
vsi_nn_tensor_attr_t attr;
vsi_nn_internal_tensor_t * tensor = NULL;
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
memcpy( &attr.dtype, &output->attr.dtype, sizeof( attr.dtype ) );
attr.dim_num = VSI_NN_DIM_AUTO;
attr.vtl = TRUE;
attr.is_const = FALSE;
tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
return tensor;
} /* _create_output_internal_tensor() */
static vsi_nn_internal_tensor_t *_create_strided_slice_op
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input,
int32_t begin_mask,
int32_t end_mask,
int32_t index
)
{
vsi_nn_custom_tiny_yolov4_postprocess_param * p = NULL;
vsi_nn_internal_tensor_t * tensor = NULL;
vsi_nn_internal_node_t* curr = NULL;
p = (vsi_nn_custom_tiny_yolov4_postprocess_param *)&(self->nn_param.custom_tiny_yolov4_postprocess);
tensor = _create_internal_tensor(self, input);
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_STRIDED_SLICE, 0, 0 );
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
curr->node->nn_param.strided_slice.begin_dims = p->local->begin_dims[index];
curr->node->nn_param.strided_slice.begin_dims_num = input->attr.dim_num;
curr->node->nn_param.strided_slice.end_dims = p->local->end_dims[index];
curr->node->nn_param.strided_slice.end_dims_num = input->attr.dim_num;
curr->node->nn_param.strided_slice.stride_dims = p->local->stride_dims;
curr->node->nn_param.strided_slice.stride_dims_num = input->attr.dim_num;
curr->node->nn_param.strided_slice.begin_mask = begin_mask;
curr->node->nn_param.strided_slice.end_mask = end_mask;
curr->node->nn_param.strided_slice.shrink_axis_mask = 0;
curr->node->nn_param.strided_slice.new_axis_mask = 0;
curr->inputs[0] = input;
curr->outputs[0] = tensor->t;
vsi_nn_internal_setup_node( self, curr );
final:
return tensor;
} /* _create_strided_slice() */
static vsi_nn_internal_tensor_t *_create_sigmoid_op
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input
)
{
vsi_nn_internal_tensor_t * tensor = NULL;
vsi_nn_internal_node_t* curr = NULL;
tensor = _create_sigmoid_internal_tensor(self, input);
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_SIGMOID, 0, 0 );
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
curr->inputs[0] = input;
curr->outputs[0] = tensor->t;
vsi_nn_internal_setup_node( self, curr );
final:
return tensor;
} /* _create_sigmoid_op() */
static vsi_nn_internal_tensor_t *_create_confidence_op
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input,
vsi_nn_tensor_t * output
)
{
vsi_nn_internal_tensor_t * tensor = NULL;
vsi_nn_internal_node_t* curr = NULL;
tensor = _create_output_internal_tensor(self, output);
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE, 0, 0 );
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
curr->inputs[0] = input;
curr->outputs[0] = tensor->t;
vsi_nn_internal_setup_node( self, curr );
final:
return tensor;
} /* _create_confidence_op() */
static vsi_nn_internal_tensor_t *_create_box_op
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input0,
vsi_nn_tensor_t * input1,
vsi_nn_tensor_t * output,
float bias0,
float bias1
)
{
vsi_nn_internal_tensor_t * tensor = NULL;
vsi_nn_internal_node_t* curr = NULL;
tensor = _create_output_internal_tensor(self, output);
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX, 0, 0 );
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
curr->inputs[0] = input0;
curr->inputs[1] = input1;
curr->outputs[0] = tensor->t;
curr->node->nn_param.custom_tiny_yolov4_postprocess_box.bias_0 = bias0;
curr->node->nn_param.custom_tiny_yolov4_postprocess_box.bias_1 = bias1;
vsi_nn_internal_setup_node( self, curr );
final:
return tensor;
} /* _create_box_op() */
static vsi_nn_internal_tensor_t *_create_reshape_op
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input,
vsi_nn_tensor_t * output,
vsi_size_t width
)
{
vsi_nn_internal_tensor_t * tensor = NULL;
vsi_nn_internal_node_t* curr = NULL;
vsi_size_t shape_1[] = { 1, (vsi_size_t)-1, 1 };
shape_1[0] = width;
tensor = _create_output_internal_tensor(self, output);
CHECK_PTR_FAIL_GOTO( tensor, "Create internal tensor fail.", final );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESHAPE2, 0, 0 );
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
curr->inputs[0] = input;
curr->outputs[0] = tensor->t;
curr->node->nn_param.reshape2.size = shape_1;
curr->node->nn_param.reshape2.dim_num = 3;
vsi_nn_internal_setup_node( self, curr );
final:
return tensor;
} /* _create_reshape_op() */
static vsi_bool _create_concat_op
(
vsi_nn_node_t * self,
vsi_nn_tensor_t * input0,
vsi_nn_tensor_t * input1,
vsi_nn_tensor_t * input2,
vsi_nn_tensor_t * input3,
vsi_nn_tensor_t * input4,
vsi_nn_tensor_t * input5,
vsi_nn_tensor_t * output
)
{
vsi_nn_internal_node_t* curr = NULL;
vsi_bool ret = FALSE;
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_CONCAT, 6, 1 );
CHECK_PTR_FAIL_GOTO(curr, "Create internal node failed", final);
curr->inputs[0] = input0;
curr->inputs[1] = input1;
curr->inputs[2] = input2;
curr->inputs[3] = input3;
curr->inputs[4] = input4;
curr->inputs[5] = input5;
curr->outputs[0] = output;
curr->node->nn_param.concat.axis = 1;
ret = vsi_nn_internal_setup_node( self, curr );
final:
return ret;
} /* _create_concat_op() */
static vsi_status op_compute
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return vsi_nn_internal_compute_node( self );
} /* op_compute() */
static vsi_bool op_check
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
BEGIN_IO_TYPE_DECL(CUSTOM_TINY_YOLOV4_POSTPROCESS, 4, 2)
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM, D_U8|Q_ASYM)
END_IO_TYPE_DECL(CUSTOM_TINY_YOLOV4_POSTPROCESS)
if (!VALIDATE_OP_IO_TYPES(CUSTOM_TINY_YOLOV4_POSTPROCESS, self, inputs,
self->input.num, outputs, self->output.num))
{
char* desc = generate_op_io_types_desc(inputs,
self->input.num, outputs, self->output.num);
VSILOGE("Inputs/Outputs data type not support: %s", desc);
destroy_op_io_types_desc(desc);
return FALSE;
}
return TRUE;
} /* op_check() */
static vsi_status op_optimize
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs,
vsi_nn_opt_direction_e direction
)
{
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return vsi_nn_internal_optimize_node( self, direction );
}
static vsi_bool op_setup
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_bool ret = FALSE;
vsi_nn_internal_tensor_t * tensor0[12] = {NULL};
vsi_nn_internal_tensor_t * tensor1[12] = {NULL};
int32_t index_0 = 1;
int32_t index_1 = 0;
int32_t index_2 = 3;
int32_t index_3 = 2;
vsi_nn_internal_init_node_wksp( self );
/**confidence**/
/**input 0 chunk 0**/
/*
sub0:26x26x255 --> 26x26x81, begin: [0, 0, 4, 0] end: [0, 0, 85, 0] stride: [1, 1, 1, 1]
sub1[26, 26, 80] = sigmoid(sub0)[26, 26, 0:0] * sigmoid(sub0)[26, 26, 1:81]
sub2[80, 26, 26] = transpose(sub1)
sub3[80, 676] = reshape(sub2)
*/
tensor0[0] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 0);
CHECK_PTR_FAIL_GOTO( tensor0[0], "Create internal tensor fail.", final );
tensor0[1] = _create_sigmoid_op(self, tensor0[0]->t);
CHECK_PTR_FAIL_GOTO( tensor0[1], "Create internal tensor fail.", final );
tensor0[2] = _create_confidence_op(self, tensor0[1]->t, outputs[0]);
CHECK_PTR_FAIL_GOTO( tensor0[2], "Create internal tensor fail.", final );
tensor0[3] = _create_reshape_op(self, tensor0[2]->t, outputs[0], 80);
CHECK_PTR_FAIL_GOTO( tensor0[3], "Create internal tensor fail.", final );
/**chunk 1**/
/*
26x26x255 --> 26x26x81, begin: [0, 0, 89, 0] end: [0, 0, 170, 0] stride: [1, 1, 1, 1]
*/
tensor0[4] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 1);
CHECK_PTR_FAIL_GOTO( tensor0[4], "Create internal tensor fail.", final );
tensor0[5] = _create_sigmoid_op(self, tensor0[4]->t);
CHECK_PTR_FAIL_GOTO( tensor0[5], "Create internal tensor fail.", final );
tensor0[6] = _create_confidence_op(self, tensor0[5]->t, outputs[0]);
CHECK_PTR_FAIL_GOTO( tensor0[6], "Create internal tensor fail.", final );
tensor0[7] = _create_reshape_op(self, tensor0[6]->t, outputs[0], 80);
CHECK_PTR_FAIL_GOTO( tensor0[7], "Create internal tensor fail.", final );
/**chunk 2**/
/*
26x26x255 --> 26x26x81, begin: [0, 0, 174, 0] end: [0, 0, 255, 0] stride: [1, 1, 1, 1]
*/
tensor0[8] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 2);
CHECK_PTR_FAIL_GOTO( tensor0[8], "Create internal tensor fail.", final );
tensor0[9] = _create_sigmoid_op(self, tensor0[8]->t);
CHECK_PTR_FAIL_GOTO( tensor0[9], "Create internal tensor fail.", final );
tensor0[10] = _create_confidence_op(self, tensor0[9]->t, outputs[0]);
CHECK_PTR_FAIL_GOTO( tensor0[10], "Create internal tensor fail.", final );
tensor0[11] = _create_reshape_op(self, tensor0[10]->t, outputs[0], 80);
CHECK_PTR_FAIL_GOTO( tensor0[11], "Create internal tensor fail.", final );
/**input 1 chunk 0**/
/*
sub0:13x13x255 --> 26x26x81, begin: [0, 0, 4, 0] end: [0, 0, 85, 0] stride: [1, 1, 1, 1]
sub1[13, 13, 80] = sigmoid(sub0)[13, 13, 0:0] * sigmoid(sub0)[13, 13, 1:81]
sub2[80, 13, 13] = transpose(sub1)
sub3[80, 169] = reshape(sub2)
*/
tensor1[0] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 0);
CHECK_PTR_FAIL_GOTO( tensor1[0], "Create internal tensor fail.", final );
tensor1[1] = _create_sigmoid_op(self, tensor1[0]->t);
CHECK_PTR_FAIL_GOTO( tensor1[1], "Create internal tensor fail.", final );
tensor1[2] = _create_confidence_op(self, tensor1[1]->t, outputs[0]);
CHECK_PTR_FAIL_GOTO( tensor1[2], "Create internal tensor fail.", final );
tensor1[3] = _create_reshape_op(self, tensor1[2]->t, outputs[0], 80);
CHECK_PTR_FAIL_GOTO( tensor1[3], "Create internal tensor fail.", final );
/**chunk 1**/
/*
13x13x255 --> 13x13x81, begin: [0, 0, 89, 0] end: [0, 0, 170, 0] stride: [1, 1, 1, 1]
*/
tensor1[4] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 1);
CHECK_PTR_FAIL_GOTO( tensor1[4], "Create internal tensor fail.", final );
tensor1[5] = _create_sigmoid_op(self, tensor1[4]->t);
CHECK_PTR_FAIL_GOTO( tensor1[5], "Create internal tensor fail.", final );
tensor1[6] = _create_confidence_op(self, tensor1[5]->t, outputs[0]);
CHECK_PTR_FAIL_GOTO( tensor1[6], "Create internal tensor fail.", final );
tensor1[7] = _create_reshape_op(self, tensor1[6]->t, outputs[0], 80);
CHECK_PTR_FAIL_GOTO( tensor1[7], "Create internal tensor fail.", final );
/**chunk 2**/
/*
13x13x255 --> 13x13x81, begin: [0, 0, 174, 0] end: [0, 0, 255, 0] stride: [1, 1, 1, 1]
*/
tensor1[8] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 2);
CHECK_PTR_FAIL_GOTO( tensor1[8], "Create internal tensor fail.", final );
tensor1[9] = _create_sigmoid_op(self, tensor1[8]->t);
CHECK_PTR_FAIL_GOTO( tensor1[9], "Create internal tensor fail.", final );
tensor1[10] = _create_confidence_op(self, tensor1[9]->t, outputs[0]);
CHECK_PTR_FAIL_GOTO( tensor1[10], "Create internal tensor fail.", final );
tensor1[11] = _create_reshape_op(self, tensor1[10]->t, outputs[0], 80);
CHECK_PTR_FAIL_GOTO( tensor1[11], "Create internal tensor fail.", final );
ret = _create_concat_op(self, tensor0[3]->t, tensor0[7]->t, tensor0[11]->t,
tensor1[3]->t, tensor1[7]->t, tensor1[11]->t, outputs[0]);
if (ret == FALSE)
{
VSILOGE("Create concat operation fail");
goto final;
}
ret = FALSE;
/**box**/
/*
26x26x255 --> 26x26x4, begin: [0, 0, 0, 0] end: [0, 0, 4, 0] stride: [1, 1, 1, 1]
*/
tensor0[0] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 3);
CHECK_PTR_FAIL_GOTO( tensor0[0], "Create internal tensor fail.", final );
tensor0[1] = _create_box_op(self, tensor0[0]->t, inputs[index_2], outputs[1], 23, 27);
CHECK_PTR_FAIL_GOTO( tensor0[1], "Create internal tensor fail.", final );
tensor0[2] = _create_reshape_op(self, tensor0[1]->t, outputs[1], 4);
CHECK_PTR_FAIL_GOTO( tensor0[2], "Create internal tensor fail.", final );
/*
26x26x255 --> 26x26x4, begin: [0, 0, 85, 0] end: [0, 0, 89, 0] stride: [1, 1, 1, 1]
*/
tensor0[3] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 4);
CHECK_PTR_FAIL_GOTO( tensor0[3], "Create internal tensor fail.", final );
tensor0[4] = _create_box_op(self, tensor0[3]->t, inputs[index_2], outputs[1], 37, 58);
CHECK_PTR_FAIL_GOTO( tensor0[4], "Create internal tensor fail.", final );
tensor0[5] = _create_reshape_op(self, tensor0[4]->t, outputs[1], 4);
CHECK_PTR_FAIL_GOTO( tensor0[5], "Create internal tensor fail.", final );
/*
26x26x255 --> 26x26x4, begin: [0, 0, 85, 0] end: [0, 0, 89, 0] stride: [1, 1, 1, 1]
*/
tensor0[6] = _create_strided_slice_op(self, inputs[index_0], 11, 11, 5);
CHECK_PTR_FAIL_GOTO( tensor0[6], "Create internal tensor fail.", final );
tensor0[7] = _create_box_op(self, tensor0[6]->t, inputs[index_2], outputs[1], 81, 82);
CHECK_PTR_FAIL_GOTO( tensor0[7], "Create internal tensor fail.", final );
tensor0[8] = _create_reshape_op(self, tensor0[7]->t, outputs[1], 4);
CHECK_PTR_FAIL_GOTO( tensor0[8], "Create internal tensor fail.", final );
/*
13x13x255 --> 13x13x4, begin: [0, 0, 0, 0] end: [0, 0, 4, 0] stride: [1, 1, 1, 1]
*/
tensor1[0] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 3);
CHECK_PTR_FAIL_GOTO( tensor1[0], "Create internal tensor fail.", final );
tensor1[1] = _create_box_op(self, tensor1[0]->t, inputs[index_3], outputs[1], 81, 82);
CHECK_PTR_FAIL_GOTO( tensor1[1], "Create internal tensor fail.", final );
tensor1[2] = _create_reshape_op(self, tensor1[1]->t, outputs[1], 4);
CHECK_PTR_FAIL_GOTO( tensor1[2], "Create internal tensor fail.", final );
/*
13x13x255 --> 13x13x4, begin: [0, 0, 85, 0] end: [0, 0, 89, 0] stride: [1, 1, 1, 1]
*/
tensor1[3] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 4);
CHECK_PTR_FAIL_GOTO( tensor1[3], "Create internal tensor fail.", final );
tensor1[4] = _create_box_op(self, tensor1[3]->t, inputs[index_3], outputs[1], 135, 169);
CHECK_PTR_FAIL_GOTO( tensor1[4], "Create internal tensor fail.", final );
tensor1[5] = _create_reshape_op(self, tensor1[4]->t, outputs[1], 4);
CHECK_PTR_FAIL_GOTO( tensor1[5], "Create internal tensor fail.", final );
/*
13x13x255 --> 13x13x4, begin: [0, 0, 170, 0] end: [0, 0, 174, 0] stride: [1, 1, 1, 1]
*/
tensor1[6] = _create_strided_slice_op(self, inputs[index_1], 11, 11, 5);
CHECK_PTR_FAIL_GOTO( tensor1[6], "Create internal tensor fail.", final );
tensor1[7] = _create_box_op(self, tensor1[6]->t, inputs[index_3], outputs[1], 344, 319);
CHECK_PTR_FAIL_GOTO( tensor1[7], "Create internal tensor fail.", final );
tensor1[8] = _create_reshape_op(self, tensor1[7]->t, outputs[1], 4);
CHECK_PTR_FAIL_GOTO( tensor1[8], "Create internal tensor fail.", final );
ret = _create_concat_op(self, tensor0[2]->t, tensor0[5]->t, tensor0[8]->t,
tensor1[2]->t, tensor1[5]->t, tensor1[8]->t, outputs[1]);
if (ret == FALSE)
{
VSILOGE("Create concat operation fail");
goto final;
}
final:
return ret;
} /* op_setup() */
static vsi_status op_init
(
vsi_nn_node_t* self
)
{
int32_t i = 0;
vsi_nn_custom_tiny_yolov4_postprocess_param *p = &self->nn_param.custom_tiny_yolov4_postprocess;
p->local = \
(custom_tiny_yolov4_postprocess_local_data_t*)malloc(sizeof(custom_tiny_yolov4_postprocess_local_data_t));
CHECK_PTR_FAIL_GOTO(p->local, "create buffer fail", final);
memset(p->local, 0, sizeof(custom_tiny_yolov4_postprocess_local_data_t));
for ( i = 0; i < VSI_NN_MAX_DIM_NUM; i++ )
{
p->local->stride_dims[i] = 1;
}
p->local->begin_dims[0][2] = 4;
p->local->end_dims[0][2] = 85;
p->local->begin_dims[1][2] = 89;
p->local->end_dims[1][2] = 170;
p->local->begin_dims[2][2] = 174;
p->local->end_dims[2][2] = 255;
p->local->begin_dims[3][2] = 0;
p->local->end_dims[3][2] = 4;
p->local->begin_dims[4][2] = 85;
p->local->end_dims[4][2] = 89;
p->local->begin_dims[5][2] = 170;
p->local->end_dims[5][2] = 174;
final:
return VSI_SUCCESS;
} /* op_init() */
static vsi_status op_deinit
(
vsi_nn_node_t* self
)
{
vsi_status status = VSI_SUCCESS;
status = vsi_nn_op_common_deinit(self);
vsi_nn_safe_free(self->nn_param.custom_tiny_yolov4_postprocess.local);
vsi_nn_internal_deinit_node_wksp( self );
return status;
} /* op_deinit() */
__BEGIN_DECLS
/* Registrar */
DEF_OP_REG
(
/* op_name */ CUSTOM_TINY_YOLOV4_POSTPROCESS,
/* init */ op_init,
/* compute */ op_compute,
/* deinit */ op_deinit,
/* check */ op_check,
/* setup */ op_setup,
/* optimize */ op_optimize,
/* input_num */ _INPUT_NUM,
/* output_num */ _OUTPUT_NUM
);
__END_DECLS

View File

@ -35,9 +35,9 @@
#include "utils/vsi_nn_util.h" #include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h" #include "kernel/vsi_nn_kernel.h"
typedef struct _bilinear_grid_sample_local_data_t { typedef struct _custom_tiny_yolov4_postprocess_box_local_data_t {
int32_t placeholder; int32_t placeholder;
} bilinear_grid_sample_local_data_t; } custom_tiny_yolov4_postprocess_box_local_data_t;
/* /*
Declare number of input and output. Declare number of input and output.
@ -53,27 +53,25 @@ static vsi_status op_compute
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
vsi_nn_kernel_param_t * param = NULL;
vsi_nn_kernel_param_t* param = NULL; float bias_0 = self->nn_param.custom_tiny_yolov4_postprocess_box.bias_0;
int32_t align_corners = self->nn_param.bilinear_grid_sample.align_corners; float bias_1 = self->nn_param.custom_tiny_yolov4_postprocess_box.bias_1;
vsi_nn_kernel_node_t n;
param = vsi_nn_kernel_param_create(); param = vsi_nn_kernel_param_create();
vsi_nn_kernel_param_add_int32(param, "align_corners", align_corners); vsi_nn_kernel_param_add_float32( param, "bias_0", bias_0 );
n = vsi_nn_kernel_selector( vsi_nn_kernel_param_add_float32( param, "bias_1", bias_1 );
self->graph, "bilinear_grid_sample", inputs, 2, outputs, 1, param);
if (n == NULL) { self->n = vsi_nn_kernel_selector( self->graph, "tiny_yolov4_postprocess_box",
vsi_nn_kernel_param_release(&param); inputs, _INPUT_NUM, outputs, _OUTPUT_NUM, param );
status = VSI_FAILURE;
return status; if ( self->n )
} {
self->n = (vx_node)n;
vsi_nn_kernel_param_release(&param);
if (self->n) {
status = VSI_SUCCESS; status = VSI_SUCCESS;
} }
vsi_nn_kernel_param_release( &param );
return status; return status;
} /* op_compute() */ } /* op_compute() */
@ -85,6 +83,9 @@ static vsi_bool op_check
) )
{ {
/*TODO: Check tensor shapes. */ /*TODO: Check tensor shapes. */
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE; return TRUE;
} /* op_check() */ } /* op_check() */
@ -95,61 +96,36 @@ static vsi_bool op_setup
vsi_nn_tensor_t ** outputs vsi_nn_tensor_t ** outputs
) )
{ {
if (NULL == self) { uint32_t rank = inputs[0]->attr.dim_num;
return FALSE; vsi_bool ret = TRUE;
}
if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num) { VSI_UNREFERENCED(self);
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
outputs[0]->attr.size[0] = inputs[1]->attr.size[1]; if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
outputs[0]->attr.size[1] = inputs[1]->attr.size[2]; {
outputs[0]->attr.size[2] = inputs[0]->attr.size[2]; outputs[0]->attr.dim_num = rank;
if (4 == inputs[0]->attr.dim_num) { outputs[0]->attr.size[0] = inputs[0]->attr.size[2];
outputs[0]->attr.size[3] = inputs[0]->attr.size[3]; outputs[0]->attr.size[1] = inputs[0]->attr.size[0];
outputs[0]->attr.size[2] = inputs[0]->attr.size[1];
if (rank > 3)
{
memcpy( &outputs[0]->attr.size[3], &inputs[0]->attr.size[3], (rank - 3) * sizeof(vsi_size_t) );
} }
} }
return TRUE; return ret;
} /* op_setup() */ } /* op_setup() */
static vsi_status op_init
(
vsi_nn_node_t* self
)
{
/* TODO
//self->nn_param.bilinear_grid_sample.local = \
// (bilinear_grid_sample_local_data_t*)malloc(sizeof(bilinear_grid_sample_local_data_t));
*/
return VSI_SUCCESS;
} /* op_init() */
static vsi_status op_deinit
(
vsi_nn_node_t* self
)
{
vsi_status status = VSI_SUCCESS;
status = vsi_nn_op_common_deinit(self);
/* TODO
//vsi_nn_safe_free(self->nn_param.bilinear_grid_sample.local);
*/
return status;
} /* op_deinit() */
__BEGIN_DECLS __BEGIN_DECLS
/* Registrar */ /* Registrar */
DEF_OP_REG DEF_OP_REG
( (
/* op_name */ BILINEAR_GRID_SAMPLE, /* op_name */ CUSTOM_TINY_YOLOV4_POSTPROCESS_BOX,
/* init */ op_init, /* init */ NULL,
/* compute */ op_compute, /* compute */ op_compute,
/* deinit */ op_deinit, /* deinit */ vsi_nn_op_common_deinit,
/* check */ op_check, /* check */ op_check,
/* setup */ op_setup, /* setup */ op_setup,
/* optimize */ NULL, /* optimize */ NULL,

View File

@ -0,0 +1,127 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string.h>
#include <stdlib.h>
#include "vsi_nn_types.h"
#include "vsi_nn_log.h"
#include "vsi_nn_node.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_tensor.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
typedef struct _tiny_yolov4_postprocess_confidence_local_data_t {
int32_t placeholder;
} tiny_yolov4_postprocess_confidence_local_data_t;
/*
Declare number of input and output.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
static vsi_status op_compute
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_status status = VSI_FAILURE;
self->n = vsi_nn_kernel_selector( self->graph, "tiny_yolov4_postprocess_confidence",
inputs, 1, outputs, 1, NULL );
if ( self->n )
{
status = VSI_SUCCESS;
}
return status;
} /* op_compute() */
static vsi_bool op_check
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
/*TODO: Check tensor shapes. */
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE;
} /* op_check() */
static vsi_bool op_setup
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
uint32_t rank = inputs[0]->attr.dim_num;
vsi_bool ret = TRUE;
VSI_UNREFERENCED(self);
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
{
outputs[0]->attr.dim_num = rank;
outputs[0]->attr.size[0] = inputs[0]->attr.size[2] - 1;
outputs[0]->attr.size[1] = inputs[0]->attr.size[0];
outputs[0]->attr.size[2] = inputs[0]->attr.size[1];
if (rank > 3)
{
memcpy( &outputs[0]->attr.size[3], &inputs[0]->attr.size[3], (rank - 3) * sizeof(vsi_size_t) );
}
}
return ret;
} /* op_setup() */
__BEGIN_DECLS
/* Registrar */
DEF_OP_REG
(
/* op_name */ CUSTOM_TINY_YOLOV4_POSTPROCESS_CONFIDENCE,
/* init */ NULL,
/* compute */ op_compute,
/* deinit */ vsi_nn_op_common_deinit,
/* check */ op_check,
/* setup */ op_setup,
/* optimize */ NULL,
/* input_num */ _INPUT_NUM,
/* output_num */ _OUTPUT_NUM
);
__END_DECLS

View File

@ -54,20 +54,26 @@ DEF_KERNEL_EXECUTOR(_softmax_compute)
size_t param_size size_t param_size
) )
{ {
vsi_status status = VX_SUCCESS; vsi_status status = VSI_FAILURE;
float *buffer[_CPU_IO_NUM] = {NULL}; float *buffer[_CPU_IO_NUM] = {NULL};
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = {NULL}; vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *attr[_CPU_IO_NUM] = {NULL}; vsi_nn_kernel_tensor_attr_t *attr[_CPU_IO_NUM] = {NULL};
uint32_t i = 0, out_elements = 0; uint32_t i = 0, out_elements = 0;
int32_t axis; int32_t axis;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; // input0 tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; // input0
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; // input1 tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; // input1
tensors[2] = (vsi_nn_kernel_tensor_t)param[2]; // output tensors[2] = (vsi_nn_kernel_tensor_t)param[2]; // output
attr[0] = vsi_nn_kernel_tensor_attr_create(tensors[0]); attr[0] = vsi_nn_kernel_tensor_attr_create(tensors[0]);
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create(tensors[1]); attr[1] = vsi_nn_kernel_tensor_attr_create(tensors[1]);
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
attr[2] = vsi_nn_kernel_tensor_attr_create(tensors[2]); attr[2] = vsi_nn_kernel_tensor_attr_create(tensors[2]);
CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis); status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
CHECK_STATUS_FAIL_GOTO(status, final ); CHECK_STATUS_FAIL_GOTO(status, final );
@ -133,6 +139,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t* kernel vsi_nn_kernel_t* kernel
) )
{ {
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) ); memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
return VSI_SUCCESS; return VSI_SUCCESS;
} }
@ -153,6 +161,9 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
int32_t axis = 0; int32_t axis = 0;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
axis = vsi_nn_kernel_param_get_int32(params, "axis"); axis = vsi_nn_kernel_param_get_int32(params, "axis");
status = _query_kernel(inputs, outputs, kernel); status = _query_kernel(inputs, outputs, kernel);
if(status != VSI_SUCCESS) if(status != VSI_SUCCESS)

View File

@ -54,7 +54,7 @@ DEF_KERNEL_EXECUTOR(_softmax_exec)
size_t param_size size_t param_size
) )
{ {
vsi_status status = VX_SUCCESS; vsi_status status = VSI_FAILURE;
float* buffer[_CPU_IO_NUM] = { NULL }; float* buffer[_CPU_IO_NUM] = { NULL };
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL }; vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL }; vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL };
@ -64,11 +64,16 @@ DEF_KERNEL_EXECUTOR(_softmax_exec)
float fMax = 0.0; float fMax = 0.0;
float fProbSum = 0.0f; float fProbSum = 0.0f;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &sf_axis); status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &sf_axis);
CHECK_STATUS_FAIL_GOTO(status, final ); CHECK_STATUS_FAIL_GOTO(status, final );
@ -141,6 +146,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t* kernel vsi_nn_kernel_t* kernel
) )
{ {
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) ); memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
return VSI_SUCCESS; return VSI_SUCCESS;
} }
@ -161,6 +168,9 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
int32_t axis = 0; int32_t axis = 0;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
axis = vsi_nn_kernel_param_get_int32(params, "axis"); axis = vsi_nn_kernel_param_get_int32(params, "axis");
status = _query_kernel( inputs, outputs, kernel ); status = _query_kernel( inputs, outputs, kernel );

View File

@ -62,6 +62,7 @@ static vx_param_description_t _custom_warp_affine_kernel_param_def[] =
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
// Add kererl parameters here // Add kererl parameters here
}; };
#define _CUSTOM_WARP_AFFINE_PARAM_NUM _cnt_of_array( _custom_warp_affine_kernel_param_def ) #define _CUSTOM_WARP_AFFINE_PARAM_NUM _cnt_of_array( _custom_warp_affine_kernel_param_def )
@ -97,7 +98,7 @@ static vsi_bool _read_pixel
if (out_of_bounds) if (out_of_bounds)
{ {
*pixel = 205.0f; *pixel = 0.0f;
return TRUE; return TRUE;
} }
@ -125,6 +126,7 @@ DEF_KERNEL_EXECUTOR(_compute)
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL }; vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL }; vsi_nn_kernel_tensor_attr_t* attr[_CPU_IO_NUM] = { NULL };
int32_t type = 0; int32_t type = 0;
int32_t rgb_type = 0;
float matrix[6] = {0}; float matrix[6] = {0};
vsi_size_t i = 0; vsi_size_t i = 0;
vsi_size_t b = 0; vsi_size_t b = 0;
@ -135,11 +137,16 @@ DEF_KERNEL_EXECUTOR(_compute)
vsi_size_t height = 0; vsi_size_t height = 0;
vsi_size_t outer_size = 1; vsi_size_t outer_size = 1;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] ); out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
@ -153,6 +160,7 @@ DEF_KERNEL_EXECUTOR(_compute)
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_TYPE], status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_INPUT_TYPE],
&type); &type);
status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[9], &rgb_type);
CHECK_STATUS_FAIL_GOTO(status, final ); CHECK_STATUS_FAIL_GOTO(status, final );
for (i = 0; i < 6; i++) for (i = 0; i < 6; i++)
{ {
@ -172,34 +180,95 @@ DEF_KERNEL_EXECUTOR(_compute)
{ {
float *src_base = buffer[0] + b * attr[0]->shape->data[0] * attr[0]->shape->data[1]; float *src_base = buffer[0] + b * attr[0]->shape->data[0] * attr[0]->shape->data[1];
float *dst_base = buffer[1] + b * width * height; float *dst_base = buffer[1] + b * width * height;
for (y = 0; y < height; y++)
if ( rgb_type == VSI_NN_WARP_AFFINE_TYPE_RGB )
{ {
for (x = 0; x < width; x++) width = width / 3;
for (y = 0; y < height; y++)
{ {
float xf = 0; for (x = 0; x < width; x++)
float yf = 0;
float dst = 0;
_transform_affine(x, y, matrix, &xf, &yf);
if (type == VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR)
{ {
_read_pixel(src_base, attr[0], xf, yf, &dst); float xf = 0;
dst_base[y * width + x] = dst; float yf = 0;
float dst = 0;
_transform_affine(x, y, matrix, &xf, &yf);
if (type == VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR)
{
_read_pixel(src_base, attr[0], 3 * floorf(xf), floorf(yf), &dst);
dst_base[y * 3 * width + 3 * x] = dst;
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 1, floorf(yf), &dst);
dst_base[y * 3 * width + 3 * x + 1] = dst;
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 2, floorf(yf), &dst);
dst_base[y * 3 * width + 3 * x + 2] = dst;
}
else
{
float tl = 0, tr = 0, bl = 0, br = 0;
float ar = xf - floorf(xf);
float ab = yf - floorf(yf);
float al = 1.0f - ar;
float at = 1.0f - ab;
_read_pixel(src_base, attr[0], 3 * floorf(xf), floorf(yf), &tl);
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1), floorf(yf), &tr);
_read_pixel(src_base, attr[0], 3 * floorf(xf), floorf(yf) + 1, &bl);
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1), floorf(yf) + 1, &br);
dst_base[y * 3 * width + 3 * x] =
tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 1, floorf(yf), &tl);
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 1, floorf(yf), &tr);
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 1, floorf(yf) + 1, &bl);
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 1, floorf(yf) + 1, &br);
dst_base[y * 3 * width + 3 * x + 1] =
tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 2, floorf(yf), &tl);
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 2, floorf(yf), &tr);
_read_pixel(src_base, attr[0], 3 * floorf(xf) + 2, floorf(yf) + 1, &bl);
_read_pixel(src_base, attr[0], 3 * (floorf(xf) + 1) + 2, floorf(yf) + 1, &br);
dst_base[y * 3 * width + 3 * x + 2] =
tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
}
} }
else }
}
else
{
for (y = 0; y < height; y++)
{
for (x = 0; x < width; x++)
{ {
float tl = 0, tr = 0, bl = 0, br = 0; float xf = 0;
float ar = xf - floorf(xf); float yf = 0;
float ab = yf - floorf(yf); float dst = 0;
float al = 1.0f - ar;
float at = 1.0f - ab;
_read_pixel(src_base, attr[0], floorf(xf), floorf(yf), &tl); _transform_affine(x, y, matrix, &xf, &yf);
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf), &tr); if (type == VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR)
_read_pixel(src_base, attr[0], floorf(xf), floorf(yf) + 1, &bl); {
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf) + 1, &br); _read_pixel(src_base, attr[0], xf, yf, &dst);
dst_base[y * width + x] = dst;
}
else
{
float tl = 0, tr = 0, bl = 0, br = 0;
float ar = xf - floorf(xf);
float ab = yf - floorf(yf);
float al = 1.0f - ar;
float at = 1.0f - ab;
dst_base[y * width + x] = tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab; _read_pixel(src_base, attr[0], floorf(xf), floorf(yf), &tl);
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf), &tr);
_read_pixel(src_base, attr[0], floorf(xf), floorf(yf) + 1, &bl);
_read_pixel(src_base, attr[0], floorf(xf) + 1, floorf(yf) + 1, &br);
dst_base[y * width + x] = tl * al * at + tr * ar * at + bl * al * ab + br * ar * ab;
}
} }
} }
} }
@ -233,6 +302,8 @@ static vsi_status _query_kernel
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME ); snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute; kernel->info.function = _compute;
kernel->info.parameters = _custom_warp_affine_kernel_param_def; kernel->info.parameters = _custom_warp_affine_kernel_param_def;
@ -260,6 +331,7 @@ static vsi_nn_kernel_node_t _setup
size_t i = 0; size_t i = 0;
size_t buffer_size = 0; size_t buffer_size = 0;
int32_t type = vsi_nn_kernel_param_get_int32( params, "type"); int32_t type = vsi_nn_kernel_param_get_int32( params, "type");
int32_t rgb_type = vsi_nn_kernel_param_get_int32( params, "rgb_type");
float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size ); float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size );
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ ); status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
@ -278,6 +350,8 @@ static vsi_nn_kernel_node_t _setup
node_params[SCALAR_MATRIX_OFFSET + i] = vsi_nn_kernel_scalar_create( node_params[SCALAR_MATRIX_OFFSET + i] = vsi_nn_kernel_scalar_create(
graph, F32, &buffer[i] ); graph, F32, &buffer[i] );
} }
node_params[9] = vsi_nn_kernel_scalar_create(
graph, I32, &rgb_type );
/* Pass parameters to node. */ /* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _CUSTOM_WARP_AFFINE_PARAM_NUM ); status = vsi_nn_kernel_node_pass_param( node, node_params, _CUSTOM_WARP_AFFINE_PARAM_NUM );
@ -286,6 +360,7 @@ static vsi_nn_kernel_node_t _setup
{ {
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] ); vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] );
} }
vsi_nn_kernel_scalar_release( &node_params[9] );
} }
} }
return node; return node;

View File

@ -95,7 +95,7 @@ static vsi_bool _read_pixel
) )
{ {
vsi_size_t width = attr->shape->data[0]; vsi_size_t width = attr->shape->data[0];
vsi_size_t height = attr->shape->data[1]; vsi_size_t height = attr->shape->size > 1 ? attr->shape->data[1] : 1;
vsi_bool out_of_bounds = (x < 0 || y < 0 || x >= width || y >= height); vsi_bool out_of_bounds = (x < 0 || y < 0 || x >= width || y >= height);
vsi_size_t bx = 0, by = 0; vsi_size_t bx = 0, by = 0;
@ -139,11 +139,16 @@ DEF_KERNEL_EXECUTOR(_compute)
vsi_size_t height = 0; vsi_size_t height = 0;
vsi_size_t outer_size = 1; vsi_size_t outer_size = 1;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] ); out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
@ -237,6 +242,8 @@ static vsi_status _query_kernel
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME ); snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute; kernel->info.function = _compute;
kernel->info.parameters = _custom_warp_perspective_kernel_param_def; kernel->info.parameters = _custom_warp_perspective_kernel_param_def;

View File

@ -73,6 +73,8 @@ DEF_KERNEL_INITIALIZER(_softmax_initializer)
{0, 0, 0}, // local_size: local group size in thread {0, 0, 0}, // local_size: local group size in thread
{0, 0, 0}}; // global_size: image size in thread {0, 0, 0}}; // global_size: image size in thread
VSI_UNREFERENCED(param_size);
attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
if (!attr) if (!attr)
{ {
@ -144,6 +146,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t* kernel vsi_nn_kernel_t* kernel
) )
{ {
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) ); memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2, vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
@ -170,6 +174,9 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
int32_t axis = 0; int32_t axis = 0;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
axis = vsi_nn_kernel_param_get_int32(params, "axis"); axis = vsi_nn_kernel_param_get_int32(params, "axis");
status = _query_kernel( inputs, outputs, kernel ); status = _query_kernel( inputs, outputs, kernel );

View File

@ -0,0 +1,357 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
typedef enum
{
INTERNAL_KERNEL_TINY_YOLOV4_POSTPROCESS_BOX,
} _internal_kernel_e;
#define _SOURCE "tiny_yolov4_postprocess_box"
#define _KERNEL_NAME CVIVANTE_NAMESPACE("evis.tiny_yolov4_postprocess_box_U8_U8toU8")
// Add kernel hashtable here
#define TINY_YOLOV4_POSTPROCESS_BOX_HASH_KEY( IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
(( IN0_DTYPE ) | ( IN1_DTYPE << 8 ) | ( OUT_DTYPE << 16 ))
#define PACK_KERNEL_MAP( IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
{ TINY_YOLOV4_POSTPROCESS_BOX_HASH_KEY( IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ), \
_KERNEL_NAME, _SOURCE }
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _tiny_yolov4_postprocess_box_kernel_map[] =
{
// Register kernel here
PACK_KERNEL_MAP( U8, U8, U8 ),
};
/*
* Kernel params
*/
static vx_param_description_t _tiny_yolov4_postprocess_box_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
// Add kererl parameters here
};
#define _TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM _cnt_of_array( _tiny_yolov4_postprocess_box_kernel_param_def )
#define SCALAR_BIAS_0_VALUE (3)
#define SCALAR_BIAS_1_VALUE (4)
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_tiny_yolov4_postprocess_box_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
float CONST2 = 16.0f;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
attr[2] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
// Add initializer
gpu_param.dim = 2;
gpu_param.global_scale[0] = 4;
gpu_param.global_scale[1] = 1;
gpu_param.global_size[0] = gpu_align_p2(
(attr[0]->shape->data[0] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0], 8);
gpu_param.global_size[1] = 1;
if (attr[0]->shape->data[0] == 13 * 13)
{
CONST2 = 32.0f;
}
if (attr[0]->dtype == U8 && attr[1]->dtype == U8 && attr[2]->dtype == U8)
{
float input0_scale = attr[0]->scale;
float input0_tail = 0 - (float)attr[0]->zero_point * input0_scale;
float input1_scale = attr[1]->scale;
float input1_tail = 0 - (float)attr[1]->zero_point * input1_scale;
float output_scale = 1.0f / attr[2]->scale;
float output_zp = (float)attr[2]->zero_point;
gpu_dp_inst_t uniExtract8Data_2x8 = {{
0x33333333, // TCfg
0x11110000, // ASelt
0x03020100, 0x03020100, // ABin
0x00000000, // BSelt
0x00000000, 0x00000000, // BBin
0x00002400, // AccumType, ConstantType, and PostShift
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniDatatoFloat32_0_4x4 = {{
0x01010101, // TCfg
0x00000000, // ASelt
0x00010000, 0x00030002, // ABin
0x02020202, // BSelt
0x00000000, 0x00000000, // BBin
0x00000400, // AccumType, ConstantType, and PostShift
0x00000001, 0x00000000, 0x00000001, 0x00000000,
0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniDatatoFloat32_1_4x4 = {{
0x01010101, // TCfg
0x00000000, // ASelt
0x00050004, 0x00070006, // ABin
0x02020202, // BSelt
0x00000000, 0x00000000, // BBin
0x00000400, // AccumType, ConstantType, and PostShift
0x00000001, 0x00000000, 0x00000001, 0x00000000,
0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniDataTranspose_0_2x8 = {{
0x11111111, // TCfg
0x00000000, // ASelt
0x0c080400, 0x0d090501, // ABin
0x22222222, // BSelt
0x00000000, 0x00000000, // BBin
0x00000400, // AccumType, ConstantType, and PostShift
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniDataTranspose_1_2x8 = {{
0x11111111, // TCfg
0x00000000, // ASelt
0x0e0a0602, 0x0f0b0703, // ABin
0x22222222, // BSelt
0x00000000, 0x00000000, // BBin
0x00000400, // AccumType, ConstantType, and PostShift
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
}, GPU_DP_TYPE_16};
status = vsi_nn_kernel_gpu_add_param( node, "uniDatatoFloat32_0_4x4", &uniDatatoFloat32_0_4x4);
status |= vsi_nn_kernel_gpu_add_param( node, "uniDatatoFloat32_1_4x4", &uniDatatoFloat32_1_4x4);
status |= vsi_nn_kernel_gpu_add_param( node, "uniExtract8Data_2x8", &uniExtract8Data_2x8);
status |= vsi_nn_kernel_gpu_add_param( node, "uniDataTranspose_0_2x8", &uniDataTranspose_0_2x8);
status |= vsi_nn_kernel_gpu_add_param( node, "uniDataTranspose_1_2x8", &uniDataTranspose_1_2x8);
status |= vsi_nn_kernel_gpu_add_param( node, "input0_scale", &input0_scale);
status |= vsi_nn_kernel_gpu_add_param( node, "input0_tail", &input0_tail);
status |= vsi_nn_kernel_gpu_add_param( node, "input1_scale", &input1_scale);
status |= vsi_nn_kernel_gpu_add_param( node, "input1_tail", &input1_tail);
status |= vsi_nn_kernel_gpu_add_param( node, "output_scale", &output_scale);
status |= vsi_nn_kernel_gpu_add_param( node, "output_zp", &output_zp);
status |= vsi_nn_kernel_gpu_add_param( node, "CONST2", &CONST2);
CHECK_STATUS_FAIL_GOTO(status, final );
}
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (attr[0])
{
vsi_nn_kernel_tensor_attr_release( &attr[0] );
}
if (attr[1])
{
vsi_nn_kernel_tensor_attr_release( &attr[1] );
}
if (attr[2])
{
vsi_nn_kernel_tensor_attr_release( &attr[2] );
}
return status;
} /* _tiny_yolov4_postprocess_box_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in0_dtype;
vsi_nn_kernel_dtype_e in1_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _tiny_yolov4_postprocess_box_kernel_map;
size_t kernel_map_size = _cnt_of_array( _tiny_yolov4_postprocess_box_kernel_map );
vx_param_description_t * param_def = _tiny_yolov4_postprocess_box_kernel_param_def;
vx_kernel_initialize_f initializer = _tiny_yolov4_postprocess_box_initializer;
uint32_t key;
uint32_t i;
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
in1_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
key = TINY_YOLOV4_POSTPROCESS_BOX_HASH_KEY( in0_dtype, in1_dtype, out_dtype );
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = _cnt_of_array( _tiny_yolov4_postprocess_box_kernel_param_def );
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
"vsi_nn_kernel_header",
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
vsi_size_t shape[3][VSI_NN_MAX_DIM_NUM] = { 0 };
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
float bias_0 = vsi_nn_kernel_param_get_float32( params, "bias_0" );
float bias_1 = vsi_nn_kernel_param_get_float32( params, "bias_1" );
VSI_UNREFERENCED(params);
memcpy(shape[0], inputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
shape[0][0] = shape[0][0] * shape[0][1];
shape[0][1] = shape[0][2];
shape[0][2] = 1;
memcpy(shape[1], inputs[1]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
shape[1][0] = shape[1][0] * shape[1][1];
shape[1][1] = shape[1][2];
shape[1][2] = 1;
memcpy(shape[2], outputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
shape[2][0] = shape[2][0];
shape[2][1] = shape[2][2] * shape[2][1];
shape[2][2] = 1;
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shape[0], inputs[0]->attr.dim_num );
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
inputs[1], shape[1], inputs[1]->attr.dim_num );
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
outputs[0], shape[2], outputs[0]->attr.dim_num );
if ( !vsi_nn_kernel_gpu_check_shape(
reshape_tensors[0]->attr.size, reshape_tensors[0]->attr.dim_num ) )
{
return NULL;
}
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM,
reshape_tensors, input_num, &reshape_tensors[2], output_num );
/* Pass parameters to node. */
node_params[SCALAR_BIAS_0_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &bias_0 );
node_params[SCALAR_BIAS_1_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &bias_1 );
status = vsi_nn_kernel_node_pass_param( node, node_params, _TINY_YOLOV4_POSTPROCESS_BOX_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_BIAS_0_VALUE] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_BIAS_1_VALUE] );
}
}
vsi_safe_release_tensor( reshape_tensors[0] );
vsi_safe_release_tensor( reshape_tensors[1] );
vsi_safe_release_tensor( reshape_tensors[2] );
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_EVIS( tiny_yolov4_postprocess_box, _setup )

View File

@ -0,0 +1,320 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
typedef enum
{
INTERNAL_KERNEL_TINY_YOLOV4_POSTPROCESS_CONFIDENCE,
} _internal_kernel_e;
#define _SOURCE "tiny_yolov4_postprocess_confidence"
#define _KERNEL_NAME CVIVANTE_NAMESPACE("evis.tiny_yolov4_postprocess_conf_U8toU8")
// Add kernel hashtable here
#define _CONFIDENCE_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
{ _CONFIDENCE_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
_KERNEL_NAME, _SOURCE }
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _tiny_yolov4_postprocess_confidence_kernel_map[] =
{
// Register kernel here
PACK_KERNEL_MAP( U8, U8 ),
};
/*
* Kernel params
*/
static vx_param_description_t _tiny_yolov4_postprocess_confidence_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
// Add kererl parameters here
};
#define _TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM \
_cnt_of_array( _tiny_yolov4_postprocess_confidence_kernel_param_def )
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_tiny_yolov4_postprocess_confidence_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
gpu_param.dim = 2;
gpu_param.global_scale[0] = 4;
gpu_param.global_scale[1] = 4;
gpu_param.global_size[0] = gpu_align_p2(
(attr[0]->shape->data[0] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0], 4);
gpu_param.global_size[1] = (
(attr[1]->shape->data[0] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1]);
if (attr[0]->dtype == U8 && attr[1]->dtype == U8)
{
float output_scale = attr[0]->scale * attr[0]->scale / attr[1]->scale;
int output_zp = attr[1]->zero_point;
uint16_t M0 = 0;
int32_t postShift = 0;
int32_t i = 0;
gpu_dp_inst_t uniU8TimesU8_0_4x4 = {{
0x01010101, // TCfg
0x00000000, // ASelt
0x00010000, 0x00030002, // ABin
0x01010101, // BSelt
0x00010000, 0x00030002, // BBin
0x00000400, // AccumType, ConstantType, and PostShift
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniU16TimesMultiplier_PostShift_2x8 = {{
0x11111111, // TCfg
0x00000000, // ASelt
0x03020100, 0x07060504, // ABin
0x22222222, // BSelt
0x00000000, 0x00000000, // BBin
0x00000600, // AccumType, ConstantType, and PostShift
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniU8PlusU8_trans_0_2x8 = {{
0xffffffff, // TCfg
0x44444444, // ASelt
0x0c080400, 0x0d090501, // ABin
0x00000000, // BSelt
0x00000000, 0x00000000, // BBin
0x00007400, // AccumType, ConstantType, and PostShift
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniU8PlusU8_trans_1_2x8 = {{
0xffffffff, // TCfg
0x44444444, // ASelt
0x0e0a0602, 0x0f0b0703, // ABin
0x00000000, // BSelt
0x00000000, 0x00000000, // BBin
0x00007400, // AccumType, ConstantType, and PostShift
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_quantize_multiplier_16bit((double)output_scale, &M0, &postShift);
uniU16TimesMultiplier_PostShift_2x8.data[7] |= (postShift & 0x1F);
for ( i = 8; i < 16; i++ )
{
uniU16TimesMultiplier_PostShift_2x8.data[i] = M0;
}
status = vsi_nn_kernel_gpu_add_param( node, "uniU8TimesU8_0_4x4", &uniU8TimesU8_0_4x4);
status |= vsi_nn_kernel_gpu_add_param( node, "uniU16TimesMultiplier_PostShift_2x8",
&uniU16TimesMultiplier_PostShift_2x8);
status |= vsi_nn_kernel_gpu_add_param( node, "uniU8PlusU8_trans_0_2x8", &uniU8PlusU8_trans_0_2x8);
status |= vsi_nn_kernel_gpu_add_param( node, "uniU8PlusU8_trans_1_2x8", &uniU8PlusU8_trans_1_2x8);
status |= vsi_nn_kernel_gpu_add_param( node, "output_zp", &output_zp);
CHECK_STATUS_FAIL_GOTO(status, final );
}
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (attr[0])
{
vsi_nn_kernel_tensor_attr_release( &attr[0] );
}
if (attr[1])
{
vsi_nn_kernel_tensor_attr_release( &attr[1] );
}
return status;
} /* _tiny_yolov4_postprocess_confidence_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _tiny_yolov4_postprocess_confidence_kernel_map;
size_t kernel_map_size = _cnt_of_array( _tiny_yolov4_postprocess_confidence_kernel_map );
vx_param_description_t * param_def = _tiny_yolov4_postprocess_confidence_kernel_param_def;
vx_kernel_initialize_f initializer = _tiny_yolov4_postprocess_confidence_initializer;
uint32_t key;
uint32_t i;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
key = _CONFIDENCE_HASH_KEY( in_dtype, out_dtype );
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = _cnt_of_array( _tiny_yolov4_postprocess_confidence_kernel_param_def );
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
vsi_size_t shape[2][VSI_NN_MAX_DIM_NUM] = { 0 };
vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
VSI_UNREFERENCED(params);
memcpy(shape[0], inputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
shape[0][0] = shape[0][0] * shape[0][1];
shape[0][1] = shape[0][2];
shape[0][2] = 1;
memcpy(shape[1], outputs[0]->attr.size, VSI_NN_MAX_DIM_NUM * sizeof(vsi_size_t));
shape[1][0] = shape[1][0];
shape[1][1] = shape[1][2] * shape[1][1];
shape[1][2] = 1;
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shape[0], inputs[0]->attr.dim_num );
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
outputs[0], shape[1], outputs[0]->attr.dim_num );
if ( !vsi_nn_kernel_gpu_check_shape(
reshape_tensors[0]->attr.size, reshape_tensors[0]->attr.dim_num ) )
{
return NULL;
}
status = _query_kernel( kernel, inputs, outputs );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM,
reshape_tensors, input_num, &reshape_tensors[1], output_num );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params,
_TINY_YOLOV4_POSTPROCESS_CONFIDENCE_PARAM_NUM );
}
}
vsi_safe_release_tensor(reshape_tensors[0]);
vsi_safe_release_tensor(reshape_tensors[1]);
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_EVIS( tiny_yolov4_postprocess_confidence, _setup )

View File

@ -50,18 +50,27 @@ typedef enum _custom_warp_affine_type_e
}custom_warp_affine_type_e; }custom_warp_affine_type_e;
#define _CUSTOM_WARP_AFFINE_KERNEL_SOURCE "custom_warp_affine" #define _CUSTOM_WARP_AFFINE_KERNEL_SOURCE "custom_warp_affine"
#define _CUSTOM_WARP_AFFINE_RGB_KERNEL_SOURCE "custom_warp_affine_rgb"
// Add kernel hashtable here // Add kernel hashtable here
#define CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, IMG_2D ) \ #define CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, IMG_2D, RGB_TYPE ) \
(( IN_DTYPE ) | ( OUT_DTYPE << 8 ) | (TYPE << 16) | (IMG_2D << 20)) (( IN_DTYPE ) | ( OUT_DTYPE << 8 ) | (TYPE << 16) | (IMG_2D << 20) | (RGB_TYPE << 24))
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \ #define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 0 ), \ { CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 0, 0 ), \
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE), \ CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE), \
_CUSTOM_WARP_AFFINE_KERNEL_SOURCE } _CUSTOM_WARP_AFFINE_KERNEL_SOURCE }
#define PACK_2D_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \ #define PACK_2D_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 1 ), \ { CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 1, 0 ), \
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_2D"), \ CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_2D"), \
_CUSTOM_WARP_AFFINE_KERNEL_SOURCE } _CUSTOM_WARP_AFFINE_KERNEL_SOURCE }
#define PACK_RGB_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 0, 1 ), \
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_rgb"), \
_CUSTOM_WARP_AFFINE_RGB_KERNEL_SOURCE }
#define PACK_RGB_2D_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, TYPE ) \
{ CUSTOM_WARP_AFFINE_HASH_KEY( IN_DTYPE, OUT_DTYPE, TYPE, 1, 1 ), \
CVIVANTE_NAMESPACE("evis.custom_warp_affine_"#TYPE"_"#IN_DTYPE"to"#IN_DTYPE"_rgb_2D"), \
_CUSTOM_WARP_AFFINE_RGB_KERNEL_SOURCE }
typedef struct typedef struct
{ {
@ -78,6 +87,12 @@ static const _kernel_map_type _custom_warp_affine_kernel_map[] =
PACK_2D_KERNEL_MAP( U8, U8, nearest_neighbor ), PACK_2D_KERNEL_MAP( U8, U8, nearest_neighbor ),
PACK_2D_KERNEL_MAP( U8, U8, bilinear ), PACK_2D_KERNEL_MAP( U8, U8, bilinear ),
PACK_RGB_KERNEL_MAP( U8, U8, nearest_neighbor ),
PACK_RGB_KERNEL_MAP( U8, U8, bilinear ),
PACK_RGB_2D_KERNEL_MAP( U8, U8, nearest_neighbor ),
PACK_RGB_2D_KERNEL_MAP( U8, U8, bilinear ),
}; };
/* /*
@ -124,6 +139,8 @@ DEF_KERNEL_INITIALIZER(_custom_warp_affine_initializer)
float matrix4[4] = {0}; float matrix4[4] = {0};
int32_t i = 0; int32_t i = 0;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -178,7 +195,81 @@ final:
return status; return status;
} /* _custom_warp_affine_initializer() */ } /* _custom_warp_affine_initializer() */
DEF_KERNEL_INITIALIZER(_custom_warp_affine_rgb_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_nn_kernel_tensor_attr_t* attr[2] = {NULL, NULL};
vsi_size_array_t * out_shape = NULL;
float m[6] = {0};
float matrix0[4] = {0};
float matrix1[4] = {0};
int32_t i = 0;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
for (i = 0; i < 6; i++)
{
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MATRIX_OFFSET + i],
&m[i]);
CHECK_STATUS_FAIL_GOTO(status, final );
}
matrix0[0] = m[0]; matrix0[1] = m[1]; matrix0[2] = m[2]; matrix0[3] = m[3];
matrix1[0] = m[4]; matrix1[1] = m[5];
out_shape = attr[1]->shape;
gpu_param.global_scale[0] = 2;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
gpu_param.global_size[0] = (
(out_shape->data[0] + gpu_param.global_scale[0] - 1)
/ (3 * gpu_param.global_scale[0]));
gpu_param.global_size[1] = (
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1]);
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
status = vsi_nn_kernel_gpu_add_param( node,
"matrix0", &matrix0 );
status |= vsi_nn_kernel_gpu_add_param( node,
"matrix1", &matrix1 );
CHECK_STATUS_FAIL_GOTO(status, final );
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (attr[0])
{
vsi_nn_kernel_tensor_attr_release( &attr[0] );
attr[0] = NULL;
}
if (attr[1])
{
vsi_nn_kernel_tensor_attr_release( &attr[1] );
attr[1] = NULL;
}
return status;
} /* _custom_warp_affine_rgb_initializer() */
/* /*
* Query kernel * Query kernel
@ -188,7 +279,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t * kernel, vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs, vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs, vsi_nn_tensor_t * const * const outputs,
int32_t type int32_t type,
int32_t rgb_type
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
@ -205,8 +297,11 @@ static vsi_status _query_kernel
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
key = CUSTOM_WARP_AFFINE_HASH_KEY( in_dtype, out_dtype, type, is_2d_img ); key = CUSTOM_WARP_AFFINE_HASH_KEY( in_dtype, out_dtype, type, is_2d_img, rgb_type );
if (rgb_type == 1)
{
initializer = _custom_warp_affine_rgb_initializer;
}
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ ) for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{ {
if ( kernel_map[i].key == key ) if ( kernel_map[i].key == key )
@ -251,6 +346,7 @@ static vsi_nn_kernel_node_t _setup
size_t i = 0; size_t i = 0;
size_t buffer_size = 0; size_t buffer_size = 0;
int32_t type = vsi_nn_kernel_param_get_int32( params, "type"); int32_t type = vsi_nn_kernel_param_get_int32( params, "type");
int32_t rgb_type = vsi_nn_kernel_param_get_int32( params, "rgb_type");
float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size ); float * buffer = (float*)vsi_nn_kernel_param_get_const_buffer( params, "matrix", &buffer_size );
if (vsi_nn_DtypeCompare(&inputs[0]->attr.dtype, &outputs[0]->attr.dtype) == FALSE) if (vsi_nn_DtypeCompare(&inputs[0]->attr.dtype, &outputs[0]->attr.dtype) == FALSE)
@ -258,7 +354,7 @@ static vsi_nn_kernel_node_t _setup
return NULL; return NULL;
} }
status = _query_kernel( kernel, inputs, outputs, type ); status = _query_kernel( kernel, inputs, outputs, type, rgb_type );
if ( VSI_SUCCESS == status) if ( VSI_SUCCESS == status)
{ {
node = vsi_nn_kernel_create_node( graph, kernel ); node = vsi_nn_kernel_create_node( graph, kernel );
@ -282,7 +378,7 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] ); vsi_nn_kernel_scalar_release( &node_params[SCALAR_MATRIX_OFFSET + i] );
} }
// Set default border mode. // Set default border mode.
border.constant_value.U32 = 0xcdcdcdcd; border.constant_value.U32 = 0x00000000;
status = vxSetNodeAttribute( (vx_node)node, VX_NODE_BORDER, &border, sizeof(border) ); status = vxSetNodeAttribute( (vx_node)node, VX_NODE_BORDER, &border, sizeof(border) );
CHECK_STATUS(status); CHECK_STATUS(status);
} }

View File

@ -127,6 +127,8 @@ DEF_KERNEL_INITIALIZER(_custom_warp_perspective_initializer)
float matrix4[4] = {0}; float matrix4[4] = {0};
int32_t i = 0; int32_t i = 0;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );

View File

@ -48,6 +48,9 @@ static vsi_status op_compute
{ {
vsi_status status = VSI_SUCCESS; vsi_status status = VSI_SUCCESS;
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
#if defined(VX_DENOISE_POSTPROCESS_SUPPORT) && VX_DENOISE_POSTPROCESS_SUPPORT #if defined(VX_DENOISE_POSTPROCESS_SUPPORT) && VX_DENOISE_POSTPROCESS_SUPPORT
self->n = vxDenoisePostProcesslayer( self->n = vxDenoisePostProcesslayer(
self->graph->g, self->graph->g,
@ -83,6 +86,9 @@ static vsi_bool op_check
vsi_nn_tensor_t ** outputs vsi_nn_tensor_t ** outputs
) )
{ {
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE; return TRUE;
} /* op_check() */ } /* op_check() */
@ -93,6 +99,9 @@ static vsi_bool op_setup
vsi_nn_tensor_t ** outputs vsi_nn_tensor_t ** outputs
) )
{ {
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE; return TRUE;
} /* op_setup() */ } /* op_setup() */
@ -101,6 +110,7 @@ static vsi_status op_init
vsi_nn_node_t* self vsi_nn_node_t* self
) )
{ {
VSI_UNREFERENCED(self);
return VSI_SUCCESS; return VSI_SUCCESS;
} /* op_init() */ } /* op_init() */

View File

@ -63,6 +63,9 @@ static vsi_bool op_check
) )
{ {
/*TODO: Check params. */ /*TODO: Check params. */
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE; return TRUE;
} /* op_check() */ } /* op_check() */
@ -73,6 +76,7 @@ static vsi_bool op_setup
vsi_nn_tensor_t ** outputs vsi_nn_tensor_t ** outputs
) )
{ {
VSI_UNREFERENCED(node);
if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num) if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num)
{ {
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num; outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;

View File

@ -62,6 +62,9 @@ static vsi_bool op_check
) )
{ {
/*TODO: Check params. */ /*TODO: Check params. */
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE; return TRUE;
} /* op_check() */ } /* op_check() */
@ -72,6 +75,7 @@ static vsi_bool op_setup
vsi_nn_tensor_t ** outputs vsi_nn_tensor_t ** outputs
) )
{ {
VSI_UNREFERENCED(node);
if( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num ) if( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
{ {
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num; outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;

View File

@ -59,6 +59,7 @@ static vsi_status op_compute
param = vsi_nn_kernel_param_create(); param = vsi_nn_kernel_param_create();
vsi_nn_kernel_param_add_const_buffer( param, "matrix", p->matrix, 6 ); vsi_nn_kernel_param_add_const_buffer( param, "matrix", p->matrix, 6 );
vsi_nn_kernel_param_add_int32( param, "type", p->type); vsi_nn_kernel_param_add_int32( param, "type", p->type);
vsi_nn_kernel_param_add_int32( param, "rgb_type", p->rgb_type);
self->n = (vx_node)vsi_nn_kernel_selector( self->graph, self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
"custom_warp_affine", "custom_warp_affine",
@ -78,6 +79,9 @@ static vsi_bool op_check
) )
{ {
/*TODO: Check tensor shapes. */ /*TODO: Check tensor shapes. */
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE; return TRUE;
} /* op_check() */ } /* op_check() */

View File

@ -78,6 +78,9 @@ static vsi_bool op_check
) )
{ {
/*TODO: Check tensor shapes. */ /*TODO: Check tensor shapes. */
VSI_UNREFERENCED(self);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(outputs);
return TRUE; return TRUE;
} /* op_check() */ } /* op_check() */

View File

@ -100,7 +100,7 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
size_t param_size size_t param_size
) )
{ {
vsi_status status = VX_FAILURE; vsi_status status = VSI_FAILURE;
// Alignment with a power of two value. // Alignment with a power of two value.
gpu_param_t gpu_param = { gpu_param_t gpu_param = {
2, 2,
@ -113,6 +113,8 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
vsi_nn_kernel_tensor_attr_t *input0_attr = NULL; vsi_nn_kernel_tensor_attr_t *input0_attr = NULL;
vsi_size_array_t *input_shape = NULL; vsi_size_array_t *input_shape = NULL;
VSI_UNREFERENCED(param_size);
input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0); input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final ); CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
input_shape = input0_attr->shape; input_shape = input0_attr->shape;

View File

@ -143,6 +143,8 @@ DEF_KERNEL_INITIALIZER(_argmax_initializer)
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -183,7 +185,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e output_dtype; vsi_nn_kernel_dtype_e output_dtype;
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
uint32_t key; uint32_t key;
int32_t i; size_t i;
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -240,6 +242,9 @@ static vsi_nn_kernel_node_t _setup
int32_t axis = 0; int32_t axis = 0;
vsi_size_t axis_size = 0; vsi_size_t axis_size = 0;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
axis = vsi_nn_kernel_param_get_int32(params, "axis"); axis = vsi_nn_kernel_param_get_int32(params, "axis");
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size, if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,

View File

@ -143,6 +143,8 @@ DEF_KERNEL_INITIALIZER(_argmin_initializer)
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -183,7 +185,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e output_dtype; vsi_nn_kernel_dtype_e output_dtype;
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
uint32_t key; uint32_t key;
int32_t i; size_t i;
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -240,6 +242,9 @@ static vsi_nn_kernel_node_t _setup
int32_t axis = 0; int32_t axis = 0;
size_t axis_size = 0; size_t axis_size = 0;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
axis = vsi_nn_kernel_param_get_int32(params, "axis"); axis = vsi_nn_kernel_param_get_int32(params, "axis");
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size, if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,

View File

@ -129,6 +129,8 @@ DEF_KERNEL_INITIALIZER(_avg_pool3d_initializer)
vsi_nn_kernel_tensor_attr_t *output_attr = NULL; vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
vsi_size_array_t *output_shape = NULL; vsi_size_array_t *output_shape = NULL;
VSI_UNREFERENCED(param_size);
vxReadScalarValue(depth_out, &depth_out_value); vxReadScalarValue(depth_out, &depth_out_value);
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output ); output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final ); CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );

View File

@ -135,6 +135,8 @@ DEF_KERNEL_INITIALIZER(_log_softmax_initializer)
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
vsi_size_array_t * in_shape = NULL; vsi_size_array_t * in_shape = NULL;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
@ -170,7 +172,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e output_dtype; vsi_nn_kernel_dtype_e output_dtype;
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
uint32_t key; uint32_t key;
int i; size_t i;
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -238,6 +240,9 @@ static vsi_nn_kernel_node_t _setup
float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f; float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f;
float eps = vsi_nn_kernel_param_get_float32(params, "eps"); float eps = vsi_nn_kernel_param_get_float32(params, "eps");
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
if ( (inputs[1]->attr.is_const && inputs[2]->attr.is_const) if ( (inputs[1]->attr.is_const && inputs[2]->attr.is_const)
|| ( inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT16 || ( inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT16
&& inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT32 ) && inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT32 )

View File

@ -128,6 +128,8 @@ DEF_KERNEL_INITIALIZER(_bilinear_grid_sample_initializer)
vsi_nn_kernel_tensor_attr_t* output_attr = NULL; vsi_nn_kernel_tensor_attr_t* output_attr = NULL;
vsi_size_array_t* out_shape = NULL; vsi_size_array_t* out_shape = NULL;
VSI_UNREFERENCED(param_size);
output_attr = output_attr =
vsi_nn_kernel_tensor_attr_create((vsi_nn_kernel_tensor_t)param[2]); vsi_nn_kernel_tensor_attr_create((vsi_nn_kernel_tensor_t)param[2]);
CHECK_PTR_FAIL_GOTO(output_attr, "Create tensor attr buffer fail.", final); CHECK_PTR_FAIL_GOTO(output_attr, "Create tensor attr buffer fail.", final);
@ -140,9 +142,8 @@ DEF_KERNEL_INITIALIZER(_bilinear_grid_sample_initializer)
gpu_param.dim = 2; gpu_param.dim = 2;
gpu_param.global_size[0] = gpu_param.global_size[0] =
gpu_align_p2((out_shape->data[0] + gpu_param.global_scale[0] - 1) / (out_shape->data[0] + gpu_param.global_scale[0] - 1) /
gpu_param.global_scale[0], gpu_param.global_scale[0];
4);
gpu_param.global_size[1] = gpu_param.global_size[1] =
((out_shape->data[1] + gpu_param.global_scale[1] - 1) / ((out_shape->data[1] + gpu_param.global_scale[1] - 1) /
gpu_param.global_scale[1]); gpu_param.global_scale[1]);

View File

@ -134,6 +134,8 @@ DEF_KERNEL_INITIALIZER(_bucketize_initializer)
vsi_nn_kernel_tensor_attr_t * output_attr = NULL; vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(param_size);
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] ); output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );

View File

@ -128,6 +128,8 @@ DEF_KERNEL_INITIALIZER(_cast_initializer)
vsi_nn_kernel_tensor_attr_t * output_attr = NULL; vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(param_size);
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
@ -251,6 +253,8 @@ static vsi_nn_kernel_node_t _setup
vsi_bool image_2d = FALSE; vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
VSI_UNREFERENCED(params);
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size, if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
inputs[0]->attr.dim_num ) ) inputs[0]->attr.dim_num ) )
{ {

View File

@ -128,6 +128,8 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
vsi_nn_kernel_tensor_attr_t * output_attr = NULL; vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(param_size);
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );

View File

@ -229,6 +229,8 @@ DEF_KERNEL_INITIALIZER(_comparisons_initializer)
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -285,7 +287,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e output_dtype; vsi_nn_kernel_dtype_e output_dtype;
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
uint32_t key; uint32_t key;
int i; size_t i;
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type ); input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
@ -347,6 +349,9 @@ static vsi_nn_kernel_node_t _setup
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]); float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]) * input1Scale; float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]) * input1Scale;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
ret = vsi_nn_kernel_optimize_eltwise_shape( ret = vsi_nn_kernel_optimize_eltwise_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num, inputs[0]->attr.size, inputs[0]->attr.dim_num,
inputs[1]->attr.size, inputs[1]->attr.dim_num, inputs[1]->attr.size, inputs[1]->attr.dim_num,
@ -363,11 +368,11 @@ static vsi_nn_kernel_node_t _setup
outputs[0], shapes[2], new_rank ); outputs[0], shapes[2], new_rank );
#define _swap_tensor(a, b, tmp) \ #define _swap_tensor(a, b, tmp) \
do { \ { \
tmp = a; \ tmp = a; \
a = b; \ a = b; \
b = tmp; \ b = tmp; \
} while(0) }
if (shapes[1][3] > shapes[0][3] && new_rank == 4) if (shapes[1][3] > shapes[0][3] && new_rank == 4)
{ {

View File

@ -135,6 +135,8 @@ DEF_KERNEL_INITIALIZER(_cumsum_initializer)
int32_t c = 1; int32_t c = 1;
uint32_t dim = 1; uint32_t dim = 1;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
@ -203,7 +205,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e input0_dtype = U8; vsi_nn_kernel_dtype_e input0_dtype = U8;
vsi_nn_kernel_dtype_e output_dtype = U8; vsi_nn_kernel_dtype_e output_dtype = U8;
uint32_t key = 0; uint32_t key = 0;
int i = 0; size_t i = 0;
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -288,11 +290,28 @@ static vsi_nn_kernel_node_t _setup
int32_t width = 0; int32_t width = 0;
int32_t height = 0; int32_t height = 0;
int32_t channel = 1; int32_t channel = 1;
int32_t i = 0; uint32_t i = 0;
vsi_nn_kernel_optimize_softmax_shape( VSI_UNREFERENCED(input_num);
inputs[0]->attr.size, inputs[0]->attr.dim_num, axis, VSI_UNREFERENCED(output_num);
shapes[0], &rs_dim, &axis_new);
if (axis < 0)
{
axis_new = 0;
shapes[0][0] = 1;
shapes[0][1] = 1;
for (i = 0; i < inputs[0]->attr.dim_num; i++)
{
shapes[0][0] *= inputs[0]->attr.size[i];
}
rs_dim = 2;
}
else
{
vsi_nn_kernel_optimize_softmax_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num, axis,
shapes[0], &rs_dim, &axis_new);
}
if (rs_dim > 3) if (rs_dim > 3)
{ {
return NULL; return NULL;

View File

@ -103,6 +103,8 @@ DEF_KERNEL_INITIALIZER(_depth2space_crd_initializer)
int32_t output_height = 0; int32_t output_height = 0;
int32_t output_chn = 0; int32_t output_chn = 0;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
@ -145,7 +147,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e input0_dtype = U8; vsi_nn_kernel_dtype_e input0_dtype = U8;
vsi_nn_kernel_dtype_e output_dtype = U8; vsi_nn_kernel_dtype_e output_dtype = U8;
uint32_t key = 0; uint32_t key = 0;
int i = 0; size_t i = 0;
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -195,6 +197,9 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" ); int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size, if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) ) outputs[0]->attr.dim_num ) )
{ {

View File

@ -126,6 +126,9 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
vsi_nn_kernel_tensor_attr_t * input_attr = NULL; vsi_nn_kernel_tensor_attr_t * input_attr = NULL;
vsi_size_array_t * in_shape = NULL; vsi_size_array_t * in_shape = NULL;
VSI_UNREFERENCED(param_size);
VSI_UNREFERENCED(node);
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
in_shape = input_attr->shape; in_shape = input_attr->shape;

View File

@ -181,6 +181,14 @@ static vsi_nn_kernel_node_t _setup
{ {
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
VSI_UNREFERENCED(graph);
VSI_UNREFERENCED(inputs);
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(outputs);
VSI_UNREFERENCED(output_num);
VSI_UNREFERENCED(params);
VSI_UNREFERENCED(kernel);
return node; return node;
} /* _setup() */ } /* _setup() */

View File

@ -211,6 +211,9 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -253,7 +256,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e output_dtype; vsi_nn_kernel_dtype_e output_dtype;
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
uint32_t key; uint32_t key;
int i; size_t i;
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -327,6 +330,9 @@ static vsi_nn_kernel_node_t _setup
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" ); float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
float beta = vsi_nn_kernel_param_get_float32( params, "beta" ); float beta = vsi_nn_kernel_param_get_float32( params, "beta" );
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
if (unary_type == UNARY_SELU) if (unary_type == UNARY_SELU)
{ {
alpha = alpha * beta; alpha = alpha * beta;

View File

@ -135,6 +135,9 @@ DEF_KERNEL_INITIALIZER(_erf_initializer)
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -243,6 +246,10 @@ static vsi_nn_kernel_node_t _setup
float outputScale = vsi_nn_get_tensor_scale(outputs[0]); float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputZP = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f; float outputZP = (float)vsi_nn_get_tensor_zero_point(outputs[0]) + 0.5f;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
VSI_UNREFERENCED(params);
ret = vsi_nn_kernel_optimize_element_shape( ret = vsi_nn_kernel_optimize_element_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num, inputs[0]->attr.size, inputs[0]->attr.dim_num,
shape, &new_rank ); shape, &new_rank );

View File

@ -122,11 +122,14 @@ DEF_KERNEL_INITIALIZER(_floordiv_initializer)
{0, 0, 0}, {0, 0, 0},
{0, 0, 0} {0, 0, 0}
}; };
vx_status status = VX_FAILURE; vsi_status status = VSI_FAILURE;
vx_tensor output = (vx_tensor)param[2]; vx_tensor output = (vx_tensor)param[2];
vsi_nn_kernel_tensor_attr_t *output_attr = NULL; vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
vsi_size_array_t *output_shape = NULL; vsi_size_array_t *output_shape = NULL;
VSI_UNREFERENCED(param_size);
VSI_UNREFERENCED(node);
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output ); output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final ); CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
@ -258,6 +261,8 @@ static vsi_nn_kernel_node_t _setup
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]); float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]); float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]);
VSI_UNREFERENCED(params);
outputScale = 1.0f / outputScale; outputScale = 1.0f / outputScale;
input0Tail = -(input0Tail * input0Scale); input0Tail = -(input0Tail * input0Scale);
input1Tail = -(input1Tail * input1Scale); input1Tail = -(input1Tail * input1Scale);

View File

@ -205,6 +205,9 @@ DEF_KERNEL_INITIALIZER(_gather_initializer)
size_t input_dims1 = 0; size_t input_dims1 = 0;
size_t i = 0; size_t i = 0;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -264,7 +267,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e input0_dtype = U8; vsi_nn_kernel_dtype_e input0_dtype = U8;
vsi_nn_kernel_dtype_e output_dtype = U8; vsi_nn_kernel_dtype_e output_dtype = U8;
uint32_t key = 0; uint32_t key = 0;
int i = 0; size_t i = 0;
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -334,6 +337,9 @@ static vsi_nn_kernel_node_t _setup
int32_t is_array = block_size >= GPU_TENSOR_MAX_WIDTH ? 1 : 0; int32_t is_array = block_size >= GPU_TENSOR_MAX_WIDTH ? 1 : 0;
int32_t i = 0; int32_t i = 0;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0, &is_array); status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0, &is_array);
status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1, &is_array); status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1, &is_array);
status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0, &is_array); status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0, &is_array);

View File

@ -51,18 +51,30 @@ typedef enum
#define STR(a) #a #define STR(a) #a
// Add kernel hashtable here // Add kernel hashtable here
#define GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, IMG_2D ) \ #define GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, IMG_2D, BEYOND_MAXWIDTH ) \
(( AXIS ) | ( IN0_DTYPE << 2 ) | ( IN1_DTYPE << 10 ) | ( OUT_DTYPE << 18 ) | ( IMG_2D << 26 )) (( AXIS ) | ( IN0_DTYPE << 2 ) | ( IN1_DTYPE << 10 ) | ( OUT_DTYPE << 18 ) | ( IMG_2D << 26 ) | \
(BEYOND_MAXWIDTH << 28))
#define PACK_KERNEL_3D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \ #define PACK_KERNEL_3D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 0 ), \ { GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 0 , 0), \
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)), \ CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)), \
_GATHER_ELEMENTS_KERNEL_SOURCE} _GATHER_ELEMENTS_KERNEL_SOURCE}
#define PACK_KERNEL_2D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \ #define PACK_KERNEL_2D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 1 ), \ { GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 1 , 0), \
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)"_2D"), \ CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)"_2D"), \
_GATHER_ELEMENTS_KERNEL_SOURCE} _GATHER_ELEMENTS_KERNEL_SOURCE}
#define PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 0 , 1), \
CVIVANTE_NAMESPACE("cl.gather_elements_beyond_maxwidth_axis"STR(AXIS)"_"STR(IN0_DTYPE)\
"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)), \
_GATHER_ELEMENTS_KERNEL_SOURCE}
#define PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 1 , 1), \
CVIVANTE_NAMESPACE("cl.gather_elements_beyond_maxwidth_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)\
"to"STR(OUT_DTYPE)"_2D"), _GATHER_ELEMENTS_KERNEL_SOURCE}
typedef struct typedef struct
{ {
uint32_t key; uint32_t key;
@ -89,6 +101,44 @@ static const _kernel_map_type _gather_elements_kernel_map[] =
PACK_KERNEL_2D_MAP( 1, F32, I32, F32 ), PACK_KERNEL_2D_MAP( 1, F32, I32, F32 ),
PACK_KERNEL_2D_MAP( 1, I32, I32, I32 ), PACK_KERNEL_2D_MAP( 1, I32, I32, I32 ),
PACK_KERNEL_2D_MAP( 1, U32, I32, U32 ), PACK_KERNEL_2D_MAP( 1, U32, I32, U32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, F32, I32, F32),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, F16, I32, F16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, I32, I32, I32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, I16, I32, I16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, I8, I32, I8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 0, U8, I32, U8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, F32, I32, F32),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, F16, I32, F16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, I32, I32, I32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, I16, I32, I16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, I8, I32, I8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 1, U8, I32, U8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, F32, I32, F32),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, F16, I32, F16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, I32, I32, I32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, I16, I32, I16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, I8, I32, I8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_3D_MAP( 2, U8, I32, U8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, F32, I32, F32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, F16, I32, F16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, I32, I32, I32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, I16, I32, I16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, I8, I32, I8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 0, U8, I32, U8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, F32, I32, F32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, F16, I32, F16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, I32, I32, I32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, I16, I32, I16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, I8, I32, I8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 1, U8, I32, U8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, F32, I32, F32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, F16, I32, F16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, I32, I32, I32 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, I16, I32, I16 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, I8, I32, I8 ),
PACK_KERNEL_BEYOND_MAXWIDTH_2D_MAP( 2, U8, I32, U8 ),
}; };
@ -126,12 +176,38 @@ DEF_KERNEL_INITIALIZER(_gather_elements_initializer)
{0, 0, 0}, {0, 0, 0},
{0, 0, 0} {0, 0, 0}
}; };
vsi_nn_kernel_tensor_attr_t * input_attr0 = NULL;
vsi_nn_kernel_tensor_attr_t * input_attr1 = NULL;
vsi_nn_kernel_tensor_attr_t * output_attr = NULL; vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
vsi_size_array_t * out_shape = NULL; vsi_size_array_t * out_shape = NULL;
uint32_t width0 = 0;
uint32_t height0 = 0;
uint32_t width1 = 0;
uint32_t height1 = 0;
uint32_t width_out = 0;
uint32_t height_out = 0;
uint32_t depth0 = 0;
uint32_t depth1 = 0;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
input_attr0 = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( input_attr0, "Create tensor attr buffer fail.", final );
input_attr1 = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( input_attr1, "Create tensor attr buffer fail.", final );
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] ); output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
width0 = (uint32_t)input_attr0->shape->data[0];
height0 = (uint32_t)input_attr0->shape->data[1];
depth0 = input_attr0->shape->size > 2 ? (uint32_t)input_attr0->shape->data[2] : 1;
width1 = (uint32_t)input_attr1->shape->data[0];
height1 = (uint32_t)input_attr1->shape->data[1];
depth1 = input_attr1->shape->size > 2 ? (uint32_t)input_attr1->shape->data[2] : 1;
width_out = (uint32_t)output_attr->shape->data[0];
height_out = (uint32_t)output_attr->shape->data[1];
out_shape = output_attr->shape; out_shape = output_attr->shape;
gpu_param.global_scale[0] = 1; gpu_param.global_scale[0] = 1;
@ -146,7 +222,25 @@ DEF_KERNEL_INITIALIZER(_gather_elements_initializer)
(out_shape->data[1] + gpu_param.global_scale[1] - 1) (out_shape->data[1] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1]); / gpu_param.global_scale[1]);
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1; gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
if (width0 >= GPU_TENSOR_MAX_WIDTH ||
width1 >= GPU_TENSOR_MAX_WIDTH ||
height0 >= GPU_TENSOR_MAX_WIDTH ||
height1 >= GPU_TENSOR_MAX_WIDTH ||
depth0 >= GPU_TENSOR_MAX_WIDTH ||
depth1 >= GPU_TENSOR_MAX_WIDTH)
{
gpu_param.global_scale[0] = 1;
gpu_param.global_size[0] = out_shape->data[0];
}
status = vsi_nn_kernel_gpu_config( node, &gpu_param ); status = vsi_nn_kernel_gpu_config( node, &gpu_param );
status |= vsi_nn_kernel_gpu_add_param( node, "width0", &width0 );
status |= vsi_nn_kernel_gpu_add_param( node, "height0", &height0 );
status |= vsi_nn_kernel_gpu_add_param( node, "width1", &width1 );
status |= vsi_nn_kernel_gpu_add_param( node, "height1", &height1 );
status |= vsi_nn_kernel_gpu_add_param( node, "width_out", &width_out );
status |= vsi_nn_kernel_gpu_add_param( node, "height_out", &height_out );
final: final:
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; } #define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
@ -178,32 +272,52 @@ static vsi_status _query_kernel
int32_t img_2d = (outputs[0]->attr.dim_num < 3 || outputs[0]->attr.size[2] == 1) ? 1 : 0; int32_t img_2d = (outputs[0]->attr.dim_num < 3 || outputs[0]->attr.size[2] == 1) ? 1 : 0;
uint32_t key = 0; uint32_t key = 0;
uint32_t i; uint32_t i;
int32_t beyond_maxwidth = 0;
vsi_size_t depth0 = inputs[0]->attr.dim_num > 2 ? inputs[0]->attr.size[2] : 1;
vsi_size_t depth1 = inputs[1]->attr.dim_num > 2 ? inputs[1]->attr.size[2] : 1;
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type ); in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (inputs[0]->attr.size[0] >= GPU_TENSOR_MAX_WIDTH ||
inputs[0]->attr.size[1] >= GPU_TENSOR_MAX_WIDTH ||
inputs[1]->attr.size[0] >= GPU_TENSOR_MAX_WIDTH ||
inputs[1]->attr.size[1] >= GPU_TENSOR_MAX_WIDTH ||
depth0 >= GPU_TENSOR_MAX_WIDTH ||
depth1 >= GPU_TENSOR_MAX_WIDTH)
{
beyond_maxwidth = 1;
}
#define _PACK_SELECT_KEY( in0_type, out_type ) \ #define _PACK_SELECT_KEY( in0_type, out_type ) \
( ( in0_type ) | ( out_type << 8 )) ( ( in0_type ) | ( out_type << 8 ))
switch (_PACK_SELECT_KEY(in0_dtype, out_dtype)) if (beyond_maxwidth == 0)
{ {
case _PACK_SELECT_KEY(F32, F32): switch (_PACK_SELECT_KEY(in0_dtype, out_dtype))
case _PACK_SELECT_KEY(F16, F16): {
key = GATHER_ELEMENTS_HASH_KEY( axis, F32, in1_dtype, F32, img_2d ); case _PACK_SELECT_KEY(F32, F32):
break; case _PACK_SELECT_KEY(F16, F16):
case _PACK_SELECT_KEY(U32, U32): key = GATHER_ELEMENTS_HASH_KEY( axis, F32, in1_dtype, F32, img_2d, 0 );
case _PACK_SELECT_KEY(U16, U16): break;
case _PACK_SELECT_KEY(U8, U8): case _PACK_SELECT_KEY(U32, U32):
key = GATHER_ELEMENTS_HASH_KEY( axis, U32, in1_dtype, U32, img_2d ); case _PACK_SELECT_KEY(U16, U16):
break; case _PACK_SELECT_KEY(U8, U8):
case _PACK_SELECT_KEY(I32, I32): key = GATHER_ELEMENTS_HASH_KEY( axis, U32, in1_dtype, U32, img_2d, 0 );
case _PACK_SELECT_KEY(I16, I16): break;
case _PACK_SELECT_KEY(I8, I8): case _PACK_SELECT_KEY(I32, I32):
key = GATHER_ELEMENTS_HASH_KEY( axis, I32, in1_dtype, I32, img_2d ); case _PACK_SELECT_KEY(I16, I16):
break; case _PACK_SELECT_KEY(I8, I8):
default: key = GATHER_ELEMENTS_HASH_KEY( axis, I32, in1_dtype, I32, img_2d, 0 );
break; break;
default:
break;
}
}
else
{
key = GATHER_ELEMENTS_HASH_KEY( axis, in0_dtype, in1_dtype, out_dtype, img_2d, 1 );
} }
#undef _PACK_SELECT_KEY #undef _PACK_SELECT_KEY
@ -221,7 +335,8 @@ static vsi_status _query_kernel
kernel->info.numParams = _cnt_of_array( _gather_elements_kernel_param_def ); kernel->info.numParams = _cnt_of_array( _gather_elements_kernel_param_def );
kernel->info.initialize = initializer; kernel->info.initialize = initializer;
// Register code source // Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1, vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
"eltwise_ops_helper",
kernel_map[i].source_name ); kernel_map[i].source_name );
// Register binary source // Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1, vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,

View File

@ -119,7 +119,7 @@ static vsi_status cal_gather_nd_tensor_reshape_size
uint32_t block_size, uint32_t block_size,
uint32_t coordDim, uint32_t coordDim,
int32_t* newDim, int32_t* newDim,
int32_t batch_dims uint32_t batch_dims
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
@ -146,17 +146,23 @@ static vsi_status cal_gather_nd_tensor_reshape_size
if (batch_dims) if (batch_dims)
{ {
int32_t rank = 1;
for (i = 0; i < offset; i++) for (i = 0; i < offset; i++)
{ {
sizes[0] *= input_size[i]; sizes[0] *= input_size[i];
} }
for (i = 0; i < coordDim; i++) for (i = 0; i < coordDim - 1; i++)
{ {
sizes[i + 1] = input_size[i + offset]; sizes[rank++] = input_size[i + offset];
} }
newDim[0] = coordDim == 1 ? 2 : 3; for (i = 0; i < batch_dims; i++)
{
sizes[rank] *= input_size[dims_num - i - 1];
}
newDim[0] = rank + 1;
} }
else else
{ {
@ -186,13 +192,27 @@ static vsi_status cal_gather_nd_tensor_reshape_size
} }
else // indices&output reshape else // indices&output reshape
{ {
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH) if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH && batch_dims == 0)
{ {
sizes[0] = block_size; sizes[0] = block_size;
sizes[1] = elementCnt / block_size; sizes[1] = elementCnt / block_size;
status = VSI_SUCCESS; status = VSI_SUCCESS;
newDim[0] = 2; newDim[0] = 2;
} }
else if (batch_dims > 0)
{
vsi_size_t batch_cnt = 1;
for (i = 0; i < batch_dims; ++i)
{
batch_cnt *= input_size[dims_num - i - 1];
}
sizes[0] = block_size;
sizes[1] = (elementCnt / block_size) / batch_cnt;
sizes[2] = batch_cnt;
status = VSI_SUCCESS;
newDim[0] = 3;
}
} }
#undef VSI_NN_MAX_IMAGE_WIDTH #undef VSI_NN_MAX_IMAGE_WIDTH
@ -220,7 +240,11 @@ DEF_KERNEL_INITIALIZER(_gather_nd_initializer)
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
int32_t block_size = 0; int32_t block_size = 0;
vsi_ssize_t indices_num = 1; vsi_size_t indices_num = 1;
vsi_size_t batch_num = 1;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
@ -229,6 +253,7 @@ DEF_KERNEL_INITIALIZER(_gather_nd_initializer)
CHECK_STATUS_FAIL_GOTO(status, final ); CHECK_STATUS_FAIL_GOTO(status, final );
indices_num = attr[0]->shape->data[1]; indices_num = attr[0]->shape->data[1];
batch_num = (attr[0]->shape->size > 2 ? attr[0]->shape->data[2] : 1);
gpu_param.global_scale[0] = 1; gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1; gpu_param.global_scale[1] = 1;
@ -237,7 +262,7 @@ DEF_KERNEL_INITIALIZER(_gather_nd_initializer)
gpu_param.global_size[0] = gpu_align_p2((block_size + gpu_param.global_scale[0] - 1) gpu_param.global_size[0] = gpu_align_p2((block_size + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0], 4); / gpu_param.global_scale[0], 4);
gpu_param.global_size[1] = indices_num; gpu_param.global_size[1] = indices_num;
gpu_param.global_size[2] = 1; gpu_param.global_size[2] = batch_num;
status = vsi_nn_kernel_gpu_config( node, &gpu_param ); status = vsi_nn_kernel_gpu_config( node, &gpu_param );
CHECK_STATUS_FAIL_GOTO(status, final); CHECK_STATUS_FAIL_GOTO(status, final);
@ -265,7 +290,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_dtype_e output_dtype = U8; vsi_nn_kernel_dtype_e output_dtype = U8;
vsi_nn_kernel_coord_type_e coord_type = _error; vsi_nn_kernel_coord_type_e coord_type = _error;
uint32_t key = 0; uint32_t key = 0;
int i = 0; int32_t batch_flg = batch_dims > 0 ? 1 : 0;
size_t i = 0;
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type ); input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type ); output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
@ -301,7 +327,7 @@ static vsi_status _query_kernel
coord_type = _3D; coord_type = _3D;
} }
key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type, batch_dims ); key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type, batch_flg );
for ( i = 0; i < _cnt_of_array(gather_nd_map); i ++ ) for ( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
{ {
@ -348,6 +374,9 @@ static vsi_nn_kernel_node_t _setup
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" ); int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0; int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim, batch_dims); status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim, batch_dims);
status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim, batch_dims); status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim, batch_dims);
status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim, batch_dims); status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim, batch_dims);

View File

@ -108,6 +108,9 @@ DEF_KERNEL_INITIALIZER(_globallppool_initializer)
vsi_nn_kernel_tensor_attr_t *output_attr = NULL; vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
vsi_size_array_t *output_shape = NULL; vsi_size_array_t *output_shape = NULL;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output ); output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final ); CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
output_shape = output_attr->shape; output_shape = output_attr->shape;

View File

@ -220,6 +220,9 @@ DEF_KERNEL_INITIALIZER(_groupnorm_sum_sqr_initializer)
vsi_ssize_t width = 0; vsi_ssize_t width = 0;
vsi_ssize_t chn = 0; vsi_ssize_t chn = 0;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -275,6 +278,9 @@ DEF_KERNEL_INITIALIZER(_groupnorm_mean_vari_initializer)
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
vsi_ssize_t chn = 0; vsi_ssize_t chn = 0;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
@ -325,6 +331,9 @@ DEF_KERNEL_INITIALIZER(_groupnorm_initializer)
vsi_ssize_t chn = 0; vsi_ssize_t chn = 0;
int32_t is2D = 0; int32_t is2D = 0;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] );
@ -489,6 +498,9 @@ static vsi_nn_kernel_node_t _setup
float rSpaceOrg = 1.0f / (width * height); float rSpaceOrg = 1.0f / (width * height);
float group_ratio = 1.0f / (inputs[0]->attr.size[0] * inputs[0]->attr.size[1] * group_size); float group_ratio = 1.0f / (inputs[0]->attr.size[0] * inputs[0]->attr.size[1] * group_size);
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size, if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) ) outputs[0]->attr.dim_num ) )
{ {

View File

@ -91,6 +91,9 @@ DEF_KERNEL_INITIALIZER(_grucell_activation_initializer)
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param);
VSI_UNREFERENCED(param_size);
// vsi_nn_kernel_tensor_attr * attr[2] = { NULL }; // vsi_nn_kernel_tensor_attr * attr[2] = { NULL };
// attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); // attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
// attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); // attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -172,6 +175,8 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_PARAM_NUM] = {NULL}; vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
VSI_UNREFERENCED(params);
/* /*
// Check if gpu can support the size // Check if gpu can support the size
if( !vsi_nn_kernel_gpu_check_shape( if( !vsi_nn_kernel_gpu_check_shape(

View File

@ -91,6 +91,10 @@ DEF_KERNEL_INITIALIZER(_grucell_activation_sma_initializer)
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
VSI_UNREFERENCED(node);
VSI_UNREFERENCED(param);
VSI_UNREFERENCED(param_size);
// vsi_nn_kernel_tensor_attr * attr[2] = { NULL }; // vsi_nn_kernel_tensor_attr * attr[2] = { NULL };
// attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); // attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
// attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); // attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -172,6 +176,8 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_SMA_PARAM_NUM] = {NULL}; vsi_nn_kernel_node_param_t node_params[_GRUCELL_ACTIVATION_SMA_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
VSI_UNREFERENCED(params);
/* /*
// Check if gpu can support the size // Check if gpu can support the size
if( !vsi_nn_kernel_gpu_check_shape( if( !vsi_nn_kernel_gpu_check_shape(

View File

@ -118,6 +118,8 @@ DEF_KERNEL_INITIALIZER(_grucell_activation_z_h_initializer)
vsi_nn_kernel_tensor_t input = NULL; vsi_nn_kernel_tensor_t input = NULL;
vsi_nn_kernel_tensor_attr_t* input_attr = NULL; vsi_nn_kernel_tensor_attr_t* input_attr = NULL;
VSI_UNREFERENCED(param_size);
input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_Z_H_HSTATE]; input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_Z_H_HSTATE];
input_attr = vsi_nn_kernel_tensor_attr_create( input ); input_attr = vsi_nn_kernel_tensor_attr_create( input );

View File

@ -110,6 +110,8 @@ DEF_KERNEL_INITIALIZER(_grucell_h_times_activation_r_initializer)
vsi_nn_kernel_tensor_t output = NULL; vsi_nn_kernel_tensor_t output = NULL;
vsi_nn_kernel_tensor_attr_t* output_attr; vsi_nn_kernel_tensor_attr_t* output_attr;
VSI_UNREFERENCED(param_size);
output = (vsi_nn_kernel_tensor_t)param[3]; output = (vsi_nn_kernel_tensor_t)param[3];
output_attr = vsi_nn_kernel_tensor_attr_create( output ); output_attr = vsi_nn_kernel_tensor_attr_create( output );

View File

@ -120,6 +120,8 @@ DEF_KERNEL_INITIALIZER(_grucell_reset_after_activation_initializer)
vsi_nn_kernel_tensor_t input = NULL; vsi_nn_kernel_tensor_t input = NULL;
vsi_nn_kernel_tensor_attr_t* input_attr = NULL; vsi_nn_kernel_tensor_attr_t* input_attr = NULL;
VSI_UNREFERENCED(param_size);
input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_H_STATE]; input = (vsi_nn_kernel_tensor_t)param[GRUCELL_ACT_H_STATE];
input_attr = vsi_nn_kernel_tensor_attr_create( input ); input_attr = vsi_nn_kernel_tensor_attr_create( input );

View File

@ -188,6 +188,8 @@ DEF_KERNEL_INITIALIZER(_instancenorm_sums_initializer)
vsi_ssize_t height = 0; vsi_ssize_t height = 0;
vsi_ssize_t chn = 0; vsi_ssize_t chn = 0;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
@ -255,6 +257,8 @@ DEF_KERNEL_INITIALIZER(_instancenorm_initializer)
vsi_ssize_t height = 0; vsi_ssize_t height = 0;
vsi_ssize_t chn = 0; vsi_ssize_t chn = 0;
VSI_UNREFERENCED(param_size);
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] );
@ -405,6 +409,9 @@ static vsi_nn_kernel_node_t _setup
float output_scale = 1.0f / vsi_nn_get_tensor_scale(outputs[0]); float output_scale = 1.0f / vsi_nn_get_tensor_scale(outputs[0]);
float inv_multiplier = (float)1.0 / (float)(width * height); float inv_multiplier = (float)1.0 / (float)(width * height);
VSI_UNREFERENCED(input_num);
VSI_UNREFERENCED(output_num);
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size, if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) ) outputs[0]->attr.dim_num ) )
{ {

Some files were not shown because too many files have changed in this diff Show More