Update internal & prebuilt-sdk for 23Q1 release (#573)

Update internal to 0e9393dbb4f653b9dfceaeaaa920d4deb8b27077
Update prebuilt-sdk to 6.4.14 release
Update cmakefiles to support above updates

Type: New Feature

Signed-off-by: Feiyue Chen <Feiyue.Chen@verisilicon.com>
This commit is contained in:
Chen Feiyue 2023-04-18 22:19:16 +08:00 committed by GitHub
parent a32f255d7f
commit 6e38e64a1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
340 changed files with 23171 additions and 29635 deletions

View File

@ -1 +1 @@
6.4.12_CL562241A_D561555_A558512_R558399_T558462_Oeb44e5c
6.4.14_CL650117A_D650117_A648302_R647402_T648811_O646970

View File

@ -501,6 +501,8 @@ enum vx_kernel_e {
VX_KERNEL_STREAM_PROCESSOR = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x32,
VX_KERNEL_NN_BATCH_GEMM_RELU_POOLING_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x33,
VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
};

View File

@ -173,7 +173,7 @@ VX_DECONV_3D_API_SUPPORT is used to declare that vsi openvx driver can support d
1: support
*/
#ifndef VX_DECONV_3D_API_SUPPORT
#define VX_DECONV_3D_API_SUPPORT 0
#define VX_DECONV_3D_API_SUPPORT 1
#endif
/*
@ -237,4 +237,26 @@ VX_STREAM_PROCESSOR_SUPPORT is used to declare that vsi openvx driver can suppor
#define VX_SCALE_EXTRA_PARAMETER_SUPPORT 1
#endif
/*
VX_INVALIDATE_HANDLE_SUPPORT is used to declare that we refined vxSwapTensorHandle API to follow KHR OpenVX 1.3 spec: tensor don't maintain handle internally if new_ptr is NULL.
[value]
0: not support
1: support
*/
#ifndef VX_INVALIDATE_HANDLE_SUPPORT
#define VX_INVALIDATE_HANDLE_SUPPORT 1
#endif
/*
VX_ACTIVATION_EXT2_SUPPORT is used to declare that ACTIVATION can support sign, hard_sigmoid, neg, clip, exp, sin, cos,
log, mish, gelu, hgelu, elu, selu, celu, rcp, softsign, atan, atanh, acosh, inverse sigmoid, round and erf.
[value]
0: not support
1: support
*/
#ifndef VX_ACTIVATION_EXT2_SUPPORT
#define VX_ACTIVATION_EXT2_SUPPORT 1
#endif
#endif /* __VX_KHR_COMPATIBLE_H__ */

View File

@ -219,6 +219,28 @@ enum vx_nn_activation_function_e
VX_NN_ACTIVATION_HSWISH = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x6,
VX_NN_ACTIVATION_CUSTOM = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x7,
VX_NN_ACTIVATION_NONE = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x8,
VX_NN_ACTIVATION_SIGN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x9,
VX_NN_ACTIVATION_HSIGMOID_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xa,
VX_NN_ACTIVATION_NEG_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xb,
VX_NN_ACTIVATION_CLIP_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xc,
VX_NN_ACTIVATION_EXP_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xd,
VX_NN_ACTIVATION_SIN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xe,
VX_NN_ACTIVATION_COS_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0xf,
VX_NN_ACTIVATION_LOG_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x10,
VX_NN_ACTIVATION_MISH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x11,
VX_NN_ACTIVATION_GELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x12,
VX_NN_ACTIVATION_HGELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x13,
VX_NN_ACTIVATION_ELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x14,
VX_NN_ACTIVATION_SELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x15,
VX_NN_ACTIVATION_CELU_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x16,
VX_NN_ACTIVATION_RECIPROCAL_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x17,
VX_NN_ACTIVATION_SOFTSIGN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x18,
VX_NN_ACTIVATION_ATAN_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x19,
VX_NN_ACTIVATION_ATANH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1a,
VX_NN_ACTIVATION_ACOSH_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1b,
VX_NN_ACTIVATION_INVERSE_SIGMOID_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1c,
VX_NN_ACTIVATION_ROUND_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1d,
VX_NN_ACTIVATION_ERF_VSI = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_NN_ACTIVATION_FUNCTION_TYPE) + 0x1e,
};
/*! \brief The Convolutional network type
@ -623,6 +645,19 @@ VX_API_ENTRY vx_tensor VX_API_CALL vxCreateTensorFromHandle2(
* \retval VX_ERROR_INVALID_REFERENCE tensor is not a valid <tt>\ref vx_tensor</tt> <tt>\ref vx_image</tt>reference created from Handle.
*/
VX_API_ENTRY vx_status VX_API_CALL vxFlushHandle(vx_reference ref);
/* !\brief Same as vxFlushHandle() also added by Verisilicon as extension API.
*/
VX_API_ENTRY vx_status VX_API_CALL vxFlushHandleVSI(vx_reference ref);
#if defined(VX_INVALIDATE_HANDLE_SUPPORT) && VX_INVALIDATE_HANDLE_SUPPORT
/*! \brief Invalidate the memory referenced by reference's handle when it is ready.
* added by Versilicon as extension API.
* \param [in] ref The reference(image or tensor) which created from handle.
* \return A <tt>\ref vx_status_e</tt> enumeration.;
* \retval VX_ERROR_INVALID_REFERENCE tensor is not a valid <tt>\ref vx_tensor</tt> <tt>\ref vx_image</tt>reference created from Handle.
*/
VX_API_ENTRY vx_status VX_API_CALL vxInvalidateHandleVSI(vx_reference ref);
#endif
#if VX_VA40_EXT_SUPPORT
/*! \brief Return a new tensor referencing the same memory location but with different shape.
@ -776,6 +811,14 @@ typedef struct _vx_nn_convolution_params_ext2_t
vx_int32 depth_multiplier; /*!< \brief depthwise multiplier value, if 0, means convolution, elsewise(>=1), the convolution is depthwiseconvolution. */
} vx_nn_convolution_params_ext2_t;
typedef struct _vx_nn_convolution_params_ext3_t
{
vx_nn_convolution_params_ext2_t ext2; /*!< \brief Convolution extension structure head */
vx_bool isPPU; /*!< \brief merge convolution and relu for PPU. */
} vx_nn_convolution_params_ext3_t;
/*==============================================================================
NN Nodes
=============================================================================*/
@ -2142,7 +2185,8 @@ typedef struct _vx_hardware_caps_params_ext_t
typedef struct _vx_hardware_caps_params_ext2_t
{
vx_hardware_caps_params_ext_t base;
vx_uint32 streamProcessorExecCount; /*!< \brief streamprocess execution count. */
vx_uint32 streamProcessorExecCount; /*!< \brief stream processor execution count. */
vx_uint32 streamProcessorVectorSize; /*!< \brief stream processor vector size. */
} vx_hardware_caps_params_ext2_t;
/*! \brief Queries hardware caps information.

View File

@ -236,6 +236,12 @@ typedef struct _vx_nn_convolution_relu_pooling_params_ext6_t
} vx_nn_convolution_relu_pooling_params_ext6_t, * vx_nn_convolution_relu_pooling_params_ext6;;
typedef struct _vx_nn_convolution_relu_pooling_params_ext7_t
{
vx_nn_convolution_relu_pooling_params_ext6_t ext6; /*!< \brief convolution relu pooling params <tt>\ref vx_nn_convolution_relu_pooling_params_ext_t</tt> */
vx_bool isSub;
} vx_nn_convolution_relu_pooling_params_ext7_t, * vx_nn_convolution_relu_pooling_params_ext7;
/*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion.
* \details This function implement Convolutional Network Convolution and Activation(Relu) and Pooling layer.
* For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined,
@ -1081,6 +1087,48 @@ VX_API_ENTRY vx_node VX_API_CALL vxTensorTableLookupLayer(
vx_lut OutLut,
vx_tensor output);
typedef struct _vx_nn_gemm_relu_pooling_params_t
{
vx_bool enable_relu; /*!< \brief Enable Relu layer function or not. */
vx_bool enable_leaky_relu; /*!< \brief Enable LeakyRelu layer function or not. */
vx_float32 alpha; /*!< \brief Alpha value for Activation */
vx_float32 beta; /*!< \brief Beta value for Activation */
vx_uint32 node_count; /*!< \brief node count to merge */
vx_float32 merged_scale[MERGED_NODE_COUNT_MAX]; /*!< \brief scale of merged node output */
vx_int32 merged_zero_point[MERGED_NODE_COUNT_MAX]; /*!< \brief zero point of merged node output */
vx_enum merged_data_type[MERGED_NODE_COUNT_MAX]; /*!< \brief data type of merged node output */
vx_enum act_func; /*!< \brief nn activation function */
vx_lut lut_in; /*!< \brief LUT in */
vx_lut lut_out; /*!< \brief LUT out */
vx_bool enbale_const_multiplier; /*!< \brief tensor mul with one of inputs as a single pixel const tensor */
vx_float32 const_multiplier; /*!< \brief const multiplier */
} vx_nn_gemm_relu_pooling_params_t, * vx_nn_gemm_relu_pooling_params;
/*! \brief Create a batch gemm node, the calcution formula is output = matrix_a * matrix_b + matrix_c.
* \param [in] graph The reference to the graph.
* \param [in] matrix_a The first input tensor.
* \param [in] matrix_b The second input tensor. Must be in the same data type and batch count as first input tensor.
* \param [in] matrix_c The third input tensor. Must be in the same data type and batch count as first input tensor. [optional]
* \param [in] trans_a If true, the matrix_a has been transposed before calcution.
* \param [in] trans_b If true, the matrix_b has been transposed before calcution.
* \param [in] trans_c If true, the matrix_c has been transposed before calcution. [optional]
* \param [in] merge_param the parameters for gemm + op merging
* \param [out] output The output tensor. Output dimension must agree the formula in the description.
* \return <tt>\ref vx_node</tt>.
* \retval vx_node A node reference. Any possible errors preventing a successful creation
* should be checked using <tt>\ref vxGetStatus</tt>
* \ingroup group_vision_function_gemm
*/
VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmReluPoolingLayer(vx_graph graph,
vx_tensor matrix_a,
vx_tensor matrix_b,
vx_tensor matrix_c,
vx_scalar trans_a,
vx_scalar trans_b,
vx_scalar trans_c,
const vx_nn_gemm_relu_pooling_params merge_param,
vx_tensor output);
#ifdef __cplusplus
}
#endif

View File

@ -165,6 +165,7 @@ typedef enum _vx_sp_attribute_e
VX_SP_ATTRIBUTE_SUM_ENGINE_CONTROL,
VX_SP_ATTRIBUTE_SUM_ENGINE_NUM_CH_MINUS_ONE,
VX_SP_ATTRIBUTE_SUM_ENGINE_2D_ACCUM_STORAGE,
VX_SP_ATTRIBUTE_SUM_ENGINE_OP_SELECT,
VX_SP_ATTRIBUTE_NUM_OF_ELEMENTS_PER_LOOP_PER_INPUT,
@ -181,6 +182,18 @@ typedef enum _vx_sp_attribute_e
VX_SP_ATTRIBUTE_CONST3, /* NN clamp max */
VX_SP_ATTRIBUTE_CONST4, /* NN clmap min */
VX_SP_ATTRIBUTE_CONST_COUNT,
VX_SP_ATTRIBUTE_SPLIT_AXIS,
VX_SP_ATTRIBUTE_SPLIT_MAX_SIZE,
VX_SP_ATTRIBUTE_SPLIT_TILEX_EQUAL_INIMAGEX,
VX_SP_ATTRIBUTE_NOT_MERGE_CONVSP,
VX_SP_ATTRIBUTE_UPDATE_CONST0_TO_PCQ_COEF_TENSOR,
VX_SP_ATTRIBUTE_RESHAPE_ARRAY, /* bit layout | output:24-29 | input3:18-23 | input2:12-17 | input1:6-11 | input0:0-5 | */
VX_SP_ATTRIBUTE_ALIGN_SP_CORE_AXIS,
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE,
VX_SP_ATTRIBUTE_TOTAL_COUNT,
}
vx_sp_attribute_e;
@ -274,9 +287,55 @@ typedef enum _vx_sp_attribute_sum_engine_2d_accum_storage_e
}
vx_sp_attribute_sum_engine_2d_accum_storage_e;
typedef enum _vx_sp_attribute_sum_engine_op_select_e
{
VX_SP_ATTRIBUTE_SUM_ENGINE_SUM_OP,
VX_SP_ATTRIBUTE_SUM_ENGINE_MAX_OP
} vx_sp_attribute_sum_engine_op_select_e;
typedef enum _vx_sp_attribute_reshape_e
{
VX_SP_ATTRIBUTE_RESHAPE_CHW2CHW = 0x00,
VX_SP_ATTRIBUTE_RESHAPE_CHW2WHC = 0x06,
VX_SP_ATTRIBUTE_RESHAPE_CHW2WCH = 0x09,
VX_SP_ATTRIBUTE_RESHAPE_CHW2HWC = 0x12,
VX_SP_ATTRIBUTE_RESHAPE_CHW2HCW = 0x18,
VX_SP_ATTRIBUTE_RESHAPE_CHW2CWH = 0x21,
}
vx_sp_attribute_reshape_e;
typedef enum _vx_sp_attribute_split_axis_e
{
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_X,
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_Y,
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_Z,
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_XY,
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_YZ,
VX_SP_ATTRIBUTE_SPLIT_ON_AXIS_XYZ,
}
vx_sp_attribute_split_axis_e;
typedef enum _vx_sp_attribute_tile_align_sp_core_e
{
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_NONE = 0,
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_X,
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_Y,
VX_SP_ATTRIBUTE_TILE_ALIGN_SP_CORE_WITH_AXIS_XY,
}
vx_sp_attribute_tile_align_sp_core_e;
typedef enum _vx_sp_attribute_keep_tile_size_e
{
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_NONE = 0,
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_X,
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_Y,
VX_SP_ATTRIBUTE_KEEP_TILE_SIZE_WITH_AXIS_XY,
}
vx_sp_attribute_keep_tile_size_e;
/**********************************************************************************************/
/*! \brief Creates an opaque reference to a spinst data.
/*! \brief Creates an external reference to a spinst data.
* \param [in] context The reference to the implementation context.
* \return A spinst data reference.
* \Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
@ -286,7 +345,17 @@ VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINST(
vx_context context
);
/*! \brief Releases a reference to a spinst object.
/*! \brief Creates an internal reference to a spinst data.
* \param [in] context The reference to the implementation context.
* \return A spinst data reference.
* \Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
* \ingroup group_object_spinst
*/
VX_API_ENTRY vx_spinst VX_API_CALL vxCreateSPINSTInternal(
vx_context context
);
/*! \brief Releases a reference to a external spinst object.
* The object may not be garbage collected until its total reference count is zero.
* \param [in] spinst_obj The pointer to the spinst data to release.
* \post After returning from this function the reference is zeroed.
@ -299,6 +368,19 @@ VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINST(
vx_spinst *spinst_obj
);
/*! \brief Releases a reference to a internal spinst object.
* The object may not be garbage collected until its total reference count is zero.
* \param [in] spinst_obj The pointer to the spinst data to release.
* \post After returning from this function the reference is zeroed.
* \return A <tt>\ref vx_status_e</tt> enumeration.
* \retval VX_SUCCESS No errors; all other values indicate failure
* \retval * An error occurred. See <tt>\ref vx_status_e</tt>.
* \ingroup group_object_spinst
*/
VX_API_ENTRY vx_status VX_API_CALL vxReleaseSPINSTInternal(
vx_spinst *spinst_obj
);
/*! \brief Add a instruction to spinst object.
* \param [in] spinst_obj The reference to the spinst object.
* \param [in] inst_unit_array The units of one instruction. Use a <tt>\ref vx_spinst_unit_param</tt>.
@ -332,6 +414,12 @@ VX_API_ENTRY vx_status VX_API_CALL vxSetAttributeToSPINST(
vx_uint32 value
);
VX_API_ENTRY vx_status VX_API_CALL vxGetAttributeToSPINST(
vx_spinst spinst_obj,
vx_enum attribute,
vx_uint32* value
);
#ifdef __cplusplus
}
#endif

View File

@ -539,6 +539,15 @@ typedef vx_enum vx_action;
*/
typedef vx_action (VX_CALLBACK *vx_nodecomplete_f)(vx_node node);
/*! \brief A callback to the client for querying information of a node.
* \see vx_action
* \see vxAssignNodeCallback
* \param [in] node The node to which the callback was attached.
* \return An action code from <tt>\ref vx_action_e</tt>.
* \ingroup group_node_callback
*/
typedef vx_status (VX_CALLBACK *vx_nodequery_f)(vx_node node);
/*! \brief Vendor IDs are 2 nibbles in size and are located in the upper byte of
* the 4 bytes of an enumeration.
* \ingroup group_basic_features
@ -1028,6 +1037,11 @@ enum vx_node_attribute_e {
VX_NODE_ATTRIBUTE_FOR_HW_QUALITY = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0xA,
VX_NODE_SWTILING_TILE_XY = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x10,
VX_NODE_SPINST_INDEX = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x11,
VX_NODE_SPCONV_PCQ_REPLACE_SPINST = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x12,
VX_NODE_SP_NAME = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x13,
VX_NODE_SPINST = VX_ATTRIBUTE_BASE(VX_ID_KHRONOS, VX_TYPE_NODE) + 0x14,
};
/*! \brief The parameter attributes list

View File

@ -31,6 +31,7 @@ if(${TIM_VX_USE_EXTERNAL_OVXLIB})
set(OVXLIB_INCLUDE_DIR ${OVXLIB_INC})
else()
set(OVXLIB_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/src/tim/vx/internal/include")
list(APPEND OVXLIB_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/src/tim/vx/internal/src")
endif()
message(STATUS "OVXLIB include directory: ${OVXLIB_INCLUDE_DIR}")

View File

@ -69,7 +69,6 @@ filegroup(
"src/custom/ops/*.c",
"src/custom/ops/kernel/evis/*.c",
"src/custom/ops/kernel/cl/*.c",
"src/custom/ops/kernel/cpu/*.c",
])
)
@ -84,6 +83,7 @@ cc_library(
linkstatic = True,
includes = [
"include",
"src",
],
hdrs = [
"include/vsi_nn_pub.h",
@ -104,6 +104,7 @@ cc_library(
"include/vsi_nn_compatibility.h",
"include/vsi_nn_assert.h",
"include/vsi_nn_feature.h",
"include/vsi_nn_post.h",
"include/vsi_nn_rnn.h",
"include/vsi_nn_rnn_helper.h",
"include/vsi_nn_rnn_prv.h",
@ -121,13 +122,15 @@ cc_library(
"include/utils/vsi_nn_limits.h",
"include/utils/vsi_nn_dtype_util.h",
"include/utils/vsi_nn_dtype_util_prv.h",
"include/utils/vsi_nn_vdata.h",
"include/utils/vsi_nn_tensor_op.h",
"include/utils/vsi_nn_dlfcn.h",
"include/utils/vsi_nn_shape_util.h",
"include/utils/vsi_nn_constraint_check.h",
"include/quantization/vsi_nn_asymmetric_affine.h",
"include/quantization/vsi_nn_dynamic_fixed_point.h",
"include/quantization/vsi_nn_perchannel_symmetric_affine.h",
"include/post/vsi_nn_post_fasterrcnn.h",
"include/post/vsi_nn_post_cmupose.h",
"include/interface/ops.def",
"include/kernel/vsi_nn_kernel.h",
"include/kernel/vsi_nn_gpu.h",
@ -168,6 +171,9 @@ cc_library(
"src/vsi_nn_daemon.c",
"src/vsi_nn_graph_optimization.c",
"src/vsi_nn_pre_post_process.c",
"src/vsi_nn_tensor_util_prv.h",
"src/vsi_nn_types_prv.h",
"src/vsi_nn_kernel_prv.h",
"src/utils/vsi_nn_link_list.c",
"src/utils/vsi_nn_util.c",
"src/utils/vsi_nn_math.c",
@ -177,14 +183,16 @@ cc_library(
"src/utils/vsi_nn_hashmap.c",
"src/utils/vsi_nn_limits.c",
"src/utils/vsi_nn_dtype_util.c",
"src/utils/vsi_nn_vdata.c",
"src/utils/vsi_nn_tensor_op.c",
"src/utils/vsi_nn_dlfcn.c",
"src/utils/vsi_nn_shape_util.c",
"src/utils/vsi_nn_dtype.c",
"src/utils/vsi_nn_constraint_check.c",
"src/quantization/vsi_nn_asymmetric_affine.c",
"src/quantization/vsi_nn_dynamic_fixed_point.c",
"src/quantization/vsi_nn_perchannel_symmetric_affine.c",
"src/post/vsi_nn_post_fasterrcnn.c",
"src/post/vsi_nn_post_cmupose.c",
"src/kernel/vsi_nn_kernel.c",
"src/kernel/vsi_nn_kernel_util.c",
"src/kernel/vsi_nn_kernel_backend.c",
@ -202,4 +210,3 @@ cc_library(
+ [":custom_srcs"],
deps = ["//prebuilt-sdk:VIV_SDK_LIB"]
)

View File

@ -5,3 +5,4 @@ DEF_NODE_TYPE(custom_softmax)
DEF_NODE_TYPE(custom_ainr_denoise_postprocess)
DEF_NODE_TYPE(custom_warp_affine)
DEF_NODE_TYPE(custom_warp_perspective)
DEF_NODE_TYPE(custom_sample)

View File

@ -5,3 +5,4 @@ DEF_OP(CUSTOM_SOFTMAX)
DEF_OP(CUSTOM_AINR_DENOISE_POSTPROCESS)
DEF_OP(CUSTOM_WARP_AFFINE)
DEF_OP(CUSTOM_WARP_PERSPECTIVE)
DEF_OP(CUSTOM_SAMPLE)

View File

@ -0,0 +1,35 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_CUSTOM_SAMPLE_H
#define _VSI_NN_OP_CUSTOM_SAMPLE_H
#include "vsi_nn_platform.h"
#include "vsi_nn_types.h"
typedef struct _vsi_nn_custom_sample_param
{
int32_t axis;
} vsi_nn_custom_sample_param;
#endif

View File

@ -30,5 +30,6 @@
#include "custom/ops/vsi_nn_op_custom_ainr_denoise_postprocess.h"
#include "custom/ops/vsi_nn_op_custom_warp_affine.h"
#include "custom/ops/vsi_nn_op_custom_warp_perspective.h"
#include "custom/ops/vsi_nn_op_custom_sample.h"
#endif

10
src/tim/vx/internal/include/interface/ops.def Normal file → Executable file
View File

@ -183,3 +183,13 @@ DEF_OP(LPPOOL)
DEF_OP(SCATTER_ELEMENTS)
DEF_OP(PRE_PROCESS_YUV422)
DEF_OP(BUCKETIZE)
DEF_OP(GLOBALLPPOOL)
DEF_OP(AVG_POOL3D)
DEF_OP(ATAN)
DEF_OP(ATANH)
DEF_OP(ACOSH)
DEF_OP(MAXUNPOOL)
DEF_OP(REVERSESEQUENCE)
DEF_OP(INVERSE_SIGMOID)
DEF_OP(GRID_SAMPLE)
DEF_OP(LPNORM)

1
src/tim/vx/internal/include/internal/internal_ops.def Normal file → Executable file
View File

@ -20,3 +20,4 @@ DEF_OP(SPACE2DEPTH_INTERNAL)
DEF_OP(GRUCELL_H_TIMES_ACTIVATION_R)
DEF_OP(GRUCELL_ACTIVATION_Z_H)
DEF_OP(REDUCE_MEAN_INTERNAL)
DEF_OP(BILINEAR_GRID_SAMPLE)

View File

@ -59,7 +59,7 @@ typedef struct
gpu_dp_type_e type;
} gpu_dp_inst_t;
typedef struct
typedef struct VSI_PUBLIC_TYPE
{
uint32_t dim;
size_t global_offset[GPU_MAX_DIMENSION_SIZE];

View File

@ -51,7 +51,7 @@ typedef enum
VSI_NN_KERNEL_TYPE_SP,
VSI_NN_KERNEL_TYPE_NUM,
VSI_NN_KERNEL_TYPE_NONE = VSI_NN_KERNEL_TYPE_NUM
} vsi_nn_kernel_type_e;
} VSI_PUBLIC_TYPE vsi_nn_kernel_type_e;
/** Kernel pirority */
enum
@ -79,7 +79,7 @@ typedef enum
BOOL8,
I4,
U4,
} vsi_nn_kernel_dtype_e;
} VSI_PUBLIC_TYPE vsi_nn_kernel_dtype_e;
typedef enum
{
@ -98,7 +98,7 @@ typedef enum
VSI_NN_GPU_SOURCE_FMT_CODE = 0,
VSI_NN_GPU_SOURCE_FMT_EXECUTABLE = 1,
VSI_NN_GPU_SOURCE_FMT_NUM
} vsi_nn_gpu_source_fmt_e;
} VSI_PUBLIC_TYPE vsi_nn_gpu_source_fmt_e;
typedef char * vsi_nn_kernel_source_t;
typedef uint32_t vsi_nn_kernel_unique_id_t;
@ -125,7 +125,7 @@ typedef struct
vsi_nn_kernel_source_info_t sources[VSI_NN_GPU_SOURCE_FMT_NUM];
vsi_nn_gpu_source_fmt_e active_source_fmt;
} gpu;
} vsi_nn_kernel_t;
} VSI_PUBLIC_TYPE vsi_nn_kernel_t;
typedef struct
{
@ -172,15 +172,15 @@ typedef struct
int32_t allow_kernel_num;
} vsi_nn_kernel_selector_t;
typedef void * vsi_nn_kernel_node_param_t;
typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_node_param_t;
typedef void * vsi_nn_kernel_tensor_t;
typedef void * vsi_nn_kernel_node_t;
typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_node_t;
typedef void * vsi_nn_kernel_graph_t;
typedef void * vsi_nn_kernel_scalar_t;
typedef void * VSI_PUBLIC_TYPE vsi_nn_kernel_scalar_t;
typedef vsi_nn_hashmap_t vsi_nn_kernel_param_t;

View File

@ -51,6 +51,10 @@ typedef int32_t vsi_nn_kernel_lut_act_e; enum
VSI_NN_KERNEL_LUT_LINEAR_EXP = 17,
VSI_NN_KERNEL_LUT_LINEAR_RSQRT = 18,
VSI_NN_KERNEL_LUT_LINEAR_SIGMOID = 19,
VSI_NN_KERNEL_LUT_ATAN = 20,
VSI_NN_KERNEL_LUT_ATANH = 21,
VSI_NN_KERNEL_LUT_ACOSH = 22,
VSI_NN_KERNEL_LUT_INVERSE_SIGMOID = 23,
};
@ -67,6 +71,8 @@ typedef struct _vsi_nn_kernel_lut_
typedef struct _vsi_nn_kernel_lut_params
{
vsi_enum act_type;
vsi_bool pwl_sign_remove_support;
float clamp_min;
float params[16];
} vsi_nn_kernel_lut_params;

View File

@ -47,7 +47,7 @@ typedef struct vsi_nn_kernel_info
vx_kernel_description_t ** kernel;
uint8_t kernel_index;
uint8_t init_index;
} vsi_nn_kernel_info_t;
} VSI_PUBLIC_TYPE vsi_nn_kernel_info_t;
uint8_t * vsi_nn_LoadBinarySource
(

View File

@ -112,6 +112,7 @@ typedef struct _vsi_nn_argmax_param
/* argmax layer local data structure */
vsi_nn_argmax_lcl_data local;
int32_t axis;
vsi_bool keep_dims;
} vsi_nn_argmax_param;
#ifdef __cplusplus

View File

@ -111,6 +111,7 @@ typedef struct _vsi_nn_argmin_param
/* argmin layer local data structure */
vsi_nn_argmin_lcl_data local;
int32_t axis;
vsi_bool keep_dims;
} vsi_nn_argmin_param;
#ifdef __cplusplus

View File

@ -0,0 +1,53 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_AVG_POOL3D_H
#define _VSI_NN_OP_AVG_POOL3D_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_avg_pool3d_param
{
/* round_type is used to calculate the output shape */
vsi_nn_round_type_e round_type;
uint32_t ksize[3];
uint32_t stride[3];
/* Pad left, right, top, bottom, front, end */
uint32_t pad[6];
/* Pad type default value shall be AUTO */
vsi_nn_pad_e pad_type;
/* Whether include pad pixels when calculating value for the edges */
int32_t count_include_pad;
} vsi_nn_avg_pool3d_param;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -21,36 +21,31 @@
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_VDATA_H
#define _VSI_NN_VDATA_H
#include <stdio.h>
#include <stdint.h>
#ifndef _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
#define _VSI_NN_OP_BILINEAR_GRID_SAMPLE_H
#include "vsi_nn_graph.h"
#include "vsi_nn_node.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
OVXLIB_API uint8_t * vsi_nn_VdataCreate
(
vsi_nn_graph_t * graph,
vsi_nn_node_t * node,
uint32_t * p_stream_size
);
OVXLIB_API vsi_nn_tensor_t * vsi_nn_CreateVDataTensor
(
vsi_nn_graph_t * graph,
uint8_t * stream,
vsi_nn_tensor_attr_t * attr
);
typedef struct _vsi_nn_bilinear_grid_sample_param
{
struct _bilinear_grid_sample_local_data_t* local;
vsi_bool align_corners;
vsi_nn_pad_mode_e padding_mode;
int32_t const_val;
} vsi_nn_bilinear_grid_sample_param;
_compiler_assert(offsetof(vsi_nn_bilinear_grid_sample_param, local) == 0, \
vsi_nn_bilinear_grid_sample_h );
#ifdef __cplusplus
}
#endif
#endif

View File

@ -21,34 +21,23 @@
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifndef _VSI_NN_OP_GATHER_ND_H
#define _VSI_NN_OP_GATHER_ND_H
#include "vsi_nn_graph.h"
#include "vsi_nn_node.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_log.h"
#include "utils/vsi_nn_util.h"
#include "vsi_nn_types.h"
uint8_t * vsi_nn_VdataCreate
(
vsi_nn_graph_t * graph,
vsi_nn_node_t * node,
uint32_t * p_stream_size
)
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_gather_nd_param
{
return NULL;
} /* vsi_nn_VdataCreate() */
int32_t batch_dims;
} vsi_nn_gather_nd_param;
vsi_nn_tensor_t * vsi_nn_CreateVDataTensor
(
vsi_nn_graph_t * graph,
uint8_t * stream,
vsi_nn_tensor_attr_t * attr
)
{
return NULL;
} /* vsi_nn_CreateVDataTensor() */
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,44 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_GLOBALLPPOOL_H
#define _VSI_NN_OP_GLOBALLPPOOL_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_globallppool_param
{
int32_t p;
} vsi_nn_globallppool_param;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,58 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_GRID_SAMPLE_H
#define _VSI_NN_OP_GRID_SAMPLE_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
//typedef uint32_t vsi_nn_grid_sample_mode_t;
//enum { bilinear = 0, nearest };
//
//typedef uint32_t vsi_nn_grid_sample_padding_mode_t;
//enum { zeros = 0, CONST };
typedef struct _grid_sample_local_data_t {
int32_t placeholder;
} grid_sample_local_data_t;
typedef struct _vsi_nn_grid_sample_param
{
grid_sample_local_data_t* local;
vsi_enum mode;
vsi_bool align_corners;
vsi_nn_pad_mode_e padding_mode;
int32_t const_val;
} vsi_nn_grid_sample_param;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -67,7 +67,7 @@ typedef struct _vsi_nn_imageprocess_param
int32_t mean_value_size;
float* mean_value;
} mean;
} vsi_nn_imageprocess_param;
} VSI_PUBLIC_TYPE vsi_nn_imageprocess_param;
/**
* Insert imageprocess op for image pre process

View File

@ -0,0 +1,45 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_INVERSE_SIGMOID_H
#define _VSI_NN_OP_INVERSE_SIGMOID_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_inverse_sigmoid_param
{
// Add parameters here
float eps;
} vsi_nn_inverse_sigmoid_param;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,45 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_LPNORM_H
#define _VSI_NN_OP_LPNORM_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_lpnorm_param
{
int axis;
int p;
} vsi_nn_lpnorm_param;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,48 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_MAXUNPOOL_H
#define _VSI_NN_OP_MAXUNPOOL_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_maxunpool_param
{
// Add parameters here
uint32_t ksize[2];
uint32_t pad[4];
uint32_t stride[2];
const uint32_t *output_size;
} vsi_nn_maxunpool_param;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -68,6 +68,8 @@ typedef struct _vsi_nn_pre_process_nv12_param
vsi_bool reverse_channel;
vsi_nn_pre_process_nv12_lcl_data* local;
vsi_nn_nv_type nv_type;
} vsi_nn_pre_process_nv12_param;
#ifdef __cplusplus

View File

@ -38,6 +38,7 @@ typedef struct _vsi_nn_reduce_mean_internal_param
vx_int32 *axis;
vx_uint32 axis_num;
float scale;
vsi_enum type;
} vsi_nn_reduce_mean_internal_param;
_compiler_assert(offsetof(vsi_nn_reduce_mean_internal_param, local) == 0, \
vsi_nn_reduce_mean_internal_h );

View File

@ -0,0 +1,45 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_REVERSESEQUENCE_H
#define _VSI_NN_OP_REVERSESEQUENCE_H
#include "vsi_nn_types.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_reversesequence_param
{
int32_t batch_axis;
int32_t time_axis;
} vsi_nn_reversesequence_param;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -38,6 +38,7 @@ typedef struct _vsi_nn_roi_align_param
float width_ratio;
int32_t height_sample_num;
int32_t width_sample_num;
vsi_nn_roi_align_type_e platform_type;
} vsi_nn_roi_align_param;
#ifdef __cplusplus

View File

@ -71,6 +71,7 @@ typedef struct _vsi_nn_strided_slice_lcl_data2
vsi_bool is_dataconvert_op;
vsi_bool is_optimized;
vsi_bool is_same_shape;
strided_slice_param params;
} vsi_nn_strided_slice_lcl_data2;

View File

@ -0,0 +1,163 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_POST_CMUPOSE_H_
#define _VSI_NN_POST_CMUPOSE_H_
#include "utils/vsi_nn_link_list.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_subset_data_t
{
float idx[20];
}vsi_nn_subset_data_t;
typedef struct _vsi_nn_subset_t
{
vsi_nn_link_list_t link_list;
vsi_nn_subset_data_t data;
} VSI_PUBLIC_TYPE vsi_nn_subset_t;
typedef struct _vsi_nn_peaks_data_t
{
uint32_t location[2];
float score;
uint32_t id;
} VSI_PUBLIC_TYPE vsi_nn_peaks_data_t;
typedef struct _vsi_nn_peaks_t
{
vsi_nn_link_list_t link_list;
vsi_nn_peaks_data_t peak;
} VSI_PUBLIC_TYPE vsi_nn_peaks_t;
typedef struct _vsi_nn_conncection_data_t
{
uint32_t x;
uint32_t y;
float score;
uint32_t i;
uint32_t j;
}vsi_nn_connection_data_t;
typedef struct _vsi_nn_connection_t
{
vsi_nn_link_list_t link_list;
vsi_nn_connection_data_t data;
}vsi_nn_connection_t;
typedef struct _vsi_nn_con_candidate_data_t
{
uint32_t i;
uint32_t j;
float score;
float candAB;
}vsi_nn_con_candidate_data_t;
typedef struct _vsi_nn_con_candidate_t
{
vsi_nn_link_list_t link_list;
vsi_nn_con_candidate_data_t data;
}vsi_nn_con_candidate_t;
typedef struct _vsi_nn_cmupose_multiplier_t
{
float *size;
uint32_t num;
}vsi_nn_cmupose_multiplier_t;
typedef struct _vsi_nn_cmupose_image_t
{
uint32_t width;
uint32_t height;
uint32_t channel;
} VSI_PUBLIC_TYPE vsi_nn_cmupose_image_t;
typedef struct _vsi_nn_cmupose_scale_search_t
{
float *size;
uint32_t num;
}vsi_nn_cmupose_scale_search_t;
typedef struct _vsi_nn_cmupose_model_t
{
uint32_t boxsize;
uint32_t stride;
uint32_t padValue;
} VSI_PUBLIC_TYPE vsi_nn_cmupose_model_t;
typedef struct _vsi_nn_cmupose_param_t
{
float thre1;
float thre2;
float thre3;
uint32_t mid_num;
vsi_nn_cmupose_scale_search_t scale_search;
} VSI_PUBLIC_TYPE vsi_nn_cmupose_param_t;
typedef struct _vsi_nn_cmupose_inputs_t
{
vsi_nn_tensor_t *net_out;
} VSI_PUBLIC_TYPE vsi_nn_cmupose_inputs_t;
typedef struct _vsi_nn_cmupose_config_t
{
vsi_nn_cmupose_inputs_t inputs;
vsi_nn_cmupose_param_t param;
vsi_nn_cmupose_model_t model;
vsi_nn_cmupose_image_t image;
} VSI_PUBLIC_TYPE vsi_nn_cmupose_config_t;
OVXLIB_API vsi_status vsi_nn_CMUPose_Post_Process
(
float *net_out,
vsi_nn_cmupose_config_t *config,
vsi_nn_peaks_t ***all_peaks_out,
uint32_t *all_peaks_num_out,
vsi_nn_subset_t **subset_list_out,
vsi_nn_peaks_data_t **peak_candidate_out,
uint32_t *peak_candidate_num_out
);
OVXLIB_API vsi_status vsi_nn_CMUPose_PostProcess
(
vsi_nn_graph_t *graph,
vsi_nn_cmupose_inputs_t *inputs,
vsi_nn_cmupose_image_t *image,
vsi_nn_cmupose_param_t *param,
vsi_nn_cmupose_model_t *model,
vsi_nn_peaks_t ***all_peaks,
uint32_t *all_peaks_num,
vsi_nn_peaks_data_t **candidate,
uint32_t *candidate_num,
vsi_nn_subset_t **subset
);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,79 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_POST_FASTERRCNN_H_
#define _VSI_NN_POST_FASTERRCNN_H_
#include "vsi_nn_types.h"
#include "vsi_nn_node_type.h"
#include "vsi_nn_tensor.h"
#include "utils/vsi_nn_link_list.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _vsi_nn_fasterrcnn_box_t
{
vsi_nn_link_list_t link_list;
/* upper-left coordinate(x1,y1) */
float x1;
float y1;
/* lower-right coordinate(x2,y2) */
float x2;
float y2;
float score;
uint32_t class_id;
} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_box_t;
typedef struct _vsi_nn_fasterrcnn_param_t
{
float conf_thresh;
float nms_thresh;
const char **classes;
uint32_t classes_num;
uint32_t rois_num;
vsi_nn_proposal_im_info iminfo;
} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_param_t;
typedef struct _vsi_nn_fasterrcnn_inputs_t
{
vsi_nn_tensor_t *rois;
vsi_nn_tensor_t *cls;
vsi_nn_tensor_t *bbox;
} VSI_PUBLIC_TYPE vsi_nn_fasterrcnn_inputs_t;
OVXLIB_API vsi_status vsi_nn_FasterRCNN_PostProcess
(
vsi_nn_graph_t *graph,
vsi_nn_fasterrcnn_inputs_t *inputs,
vsi_nn_fasterrcnn_param_t *param,
vsi_nn_fasterrcnn_box_t **dets_box
);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -29,8 +29,9 @@ extern "C"{
#endif
#include <stdint.h>
#include "vsi_nn_feature_config.h"
typedef int64_t vsi_nn_binary_tree_key_t;
typedef int64_t VSI_PUBLIC_TYPE vsi_nn_binary_tree_key_t;
#define vsi_nn_BinaryTreeInitRoot(n) do{n = NULL;} while (0);
@ -40,7 +41,7 @@ typedef struct _vsi_nn_binary_tree
struct _vsi_nn_binary_tree * right;
vsi_nn_binary_tree_key_t key;
void * data_ptr;
} vsi_nn_binary_tree_t;
} VSI_PUBLIC_TYPE vsi_nn_binary_tree_t;
OVXLIB_API void vsi_nn_BinaryTreeRemoveNode
(

View File

@ -0,0 +1,65 @@
#ifndef __VSI_NN_DLFCN_H
#define __VSI_NN_DLFCN_H
#if (defined(_MSC_VER) || defined(_WIN32) || defined(__MINGW32))
#define RTLD_LAZY 0
#define RTLD_NOW 0
#define RTLD_GLOBAL (1 << 1)
#define RTLD_LOCAL (1 << 2)
#define RTLD_DEFAULT ((void *)0)
#define RTLD_NEXT ((void *)-1)
#else
#include <dlfcn.h>
#endif
/**
* Opend a shared library
*
* @param[in] Library path
* @param[in] Opend mode.
*
* @return Library handle on success, or NULL otherwise.
*/
void* vsi_nn_dlopen
(
const char *file,
int mode
);
/**
* Close the opened library
*
* @param[in] Library handler
*
* @return TRUE on success
*/
int vsi_nn_dlclose
(
void *handle
);
/**
* Find symbol from opened library
*
* @param[in] Library handler
* @param[in] Symbol name to find.
*
* @return Symbol
*/
void* vsi_nn_dlsym
(
void *handle,
const char *name
);
/**
* Get error info.
*
* @return Error message.
*/
char * vsi_nn_dlerror(void);
#endif

View File

@ -464,6 +464,7 @@ static VSI_INLINE_API vsi_status dtype_to_float32
case VSI_NN_TYPE_BOOL8:
case VSI_NN_TYPE_UINT8:
case VSI_NN_TYPE_INT16:
case VSI_NN_TYPE_UINT16:
case VSI_NN_TYPE_INT32:
{
int32_t src_value = 0;
@ -516,6 +517,7 @@ static VSI_INLINE_API vsi_status float32_to_dtype
case VSI_NN_TYPE_BOOL8:
case VSI_NN_TYPE_UINT8:
case VSI_NN_TYPE_INT16:
case VSI_NN_TYPE_UINT16:
case VSI_NN_TYPE_INT32:
case VSI_NN_TYPE_UINT32:
{

View File

@ -36,7 +36,7 @@ typedef struct _vsi_nn_link_list
{
struct _vsi_nn_link_list * prev;
struct _vsi_nn_link_list * next;
} vsi_nn_link_list_t;
} VSI_PUBLIC_TYPE vsi_nn_link_list_t;
typedef void ( * vsi_nn_link_list_init_t )
(

View File

@ -32,7 +32,7 @@
extern "C"{
#endif
typedef vsi_nn_binary_tree_key_t vsi_nn_map_key_t;
typedef vsi_nn_binary_tree_key_t VSI_PUBLIC_TYPE vsi_nn_map_key_t;
typedef struct _vsi_nn_map_key_list
{
@ -45,7 +45,7 @@ typedef struct _vsi_nn_map
int size;
vsi_nn_map_key_list_t * keys;
vsi_nn_binary_tree_t * values;
} vsi_nn_map_t;
} VSI_PUBLIC_TYPE vsi_nn_map_t;
OVXLIB_API void vsi_nn_MapInit
(

View File

@ -99,6 +99,30 @@ typedef enum vx_nn_activation_function_e vx_co
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_NONE VX_NN_ACTIVATION_NONE
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SWISH VX_NN_ACTIVATION_SWISH
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HWISH VX_NN_ACTIVATION_HSWISH
#if (VX_ACTIVATION_EXT2_SUPPORT)
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SIGN VX_NN_ACTIVATION_SIGN_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HSIGMOID VX_NN_ACTIVATION_HSIGMOID_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_NEG VX_NN_ACTIVATION_NEG_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_CLIP VX_NN_ACTIVATION_CLIP_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_EXP VX_NN_ACTIVATION_EXP_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SIN VX_NN_ACTIVATION_SIN_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_COS VX_NN_ACTIVATION_COS_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_LOG VX_NN_ACTIVATION_LOG_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_MISH VX_NN_ACTIVATION_MISH_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_GELU VX_NN_ACTIVATION_GELU_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_HGELU VX_NN_ACTIVATION_HGELU_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ELU VX_NN_ACTIVATION_ELU_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SELU VX_NN_ACTIVATION_SELU_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_CELU VX_NN_ACTIVATION_CELU_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_RECIPROCAL VX_NN_ACTIVATION_RECIPROCAL_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_SOFTSIGN VX_NN_ACTIVATION_SOFTSIGN_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ATAN VX_NN_ACTIVATION_ATAN_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ATANH VX_NN_ACTIVATION_ATANH_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ACOSH VX_NN_ACTIVATION_ACOSH_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_INVERSE_SIGMOID VX_NN_ACTIVATION_INVERSE_SIGMOID_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ROUND VX_NN_ACTIVATION_ROUND_VSI
#define VX_CONVOLUTIONAL_NETWORK_ACTIVATION_ERF VX_NN_ACTIVATION_ERF_VSI
#endif
/*
keep the backward compatibility with spec 1.1 for vxCopyTensorPatch_11

View File

@ -77,6 +77,7 @@ typedef struct _vsi_nn_runtime_option_t
int32_t enable_concat_optimize;
int32_t enable_asymi8_to_u8;
int32_t enable_dataconvert_optimize;
int32_t enable_stream_processor;
} vsi_nn_runtime_option_t;
/**
@ -87,7 +88,7 @@ typedef struct _vsi_nn_context_t
vx_context c;
vsi_nn_hw_config_t config;
vsi_nn_runtime_option_t options;
} *vsi_nn_context_t;
} VSI_PUBLIC_TYPE *vsi_nn_context_t;
/**
* Create context

View File

@ -1,7 +1,46 @@
/****************************************************************************
*
* Copyright (c) 2019 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the Software),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
/*****Auto generated header file, Please DO NOT modify manually!*****/
#ifndef _VSI_NN_FEATURE_CONFIG_H
#define _VSI_NN_FEATURE_CONFIG_H
#define VSI_PUBLIC_TYPE
#include <VX/vx_khr_cnn.h>
#if defined(VX_KHR_COMPATIBILITY) && (0x1==VX_KHR_COMPATIBILITY)
#include <VX/vx_khr_compatible.h>
#endif
#ifndef VSI_PERCHANNEL_QUANTIZATION_SUPPORT
#define VSI_PERCHANNEL_QUANTIZATION_SUPPORT
#endif
#if defined(VX_INVALIDATE_HANDLE_SUPPORT) && VX_INVALIDATE_HANDLE_SUPPORT
#define VSI_INVALIDATE_HANDLE_SUPPORT
#endif
#ifndef VSI_0_D_TENSOR_SUPPORT
#define VSI_0_D_TENSOR_SUPPORT
#endif
#if defined(VX_TENSORVIEW_ON_ANY_DIM) && VX_TENSORVIEW_ON_ANY_DIM
#define VSI_CONCAT_ENHANCE_SUPPORT
#endif
#endif

View File

@ -74,7 +74,7 @@ extern "C" {
/**
* Graph structure
*/
struct _vsi_nn_graph
struct VSI_PUBLIC_TYPE _vsi_nn_graph
{
/** Context */
vsi_nn_context_t ctx;
@ -167,6 +167,8 @@ struct _vsi_nn_graph
} complete_signal;
vsi_bool isAllowFastMode;
//DO NOT modify this sturct.
};
/**

View File

@ -46,7 +46,7 @@ typedef enum _vsi_nn_log_level_e
VSI_NN_LOG_WARN,
VSI_NN_LOG_INFO,
VSI_NN_LOG_DEBUG
}vsi_nn_log_level_e;
} VSI_PUBLIC_TYPE vsi_nn_log_level_e;
#define VSI_NN_MAX_DEBUG_BUFFER_LEN 1024
#define VSILOGE( fmt, ... ) \

View File

@ -58,7 +58,7 @@ typedef struct _vsi_nn_node_attr_t
} vsi_nn_node_attr_t;
/** Node structure */
struct _vsi_nn_node
struct VSI_PUBLIC_TYPE _vsi_nn_node
{
/**
* Graph handle

View File

@ -200,8 +200,17 @@
#include "ops/vsi_nn_op_scatter_elements.h"
#include "ops/vsi_nn_op_pre_process_yuv422.h"
#include "ops/vsi_nn_op_bucketize.h"
#include "ops/vsi_nn_op_globallppool.h"
#include "ops/vsi_nn_op_gather_nd.h"
#include "ops/vsi_nn_op_avg_pool3d.h"
#include "ops/vsi_nn_op_maxunpool.h"
#include "ops/vsi_nn_op_reversesequence.h"
#include "ops/vsi_nn_op_grid_sample.h"
#include "ops/vsi_nn_op_bilinear_grid_sample.h"
#include "ops/vsi_nn_op_lpnorm.h"
/* custom node head define define */
#include "custom/vsi_nn_custom_node_type.h"
#include "ops/vsi_nn_op_inverse_sigmoid.h"
#if defined(__cplusplus)
extern "C"{
@ -386,6 +395,15 @@ typedef union _vsi_nn_nn_param
vsi_nn_scatter_elements_param scatter_elements;
vsi_nn_pre_process_yuv422_param pre_process_yuv422;
vsi_nn_bucketize_param bucketize;
vsi_nn_globallppool_param globallppool;
vsi_nn_gather_nd_param gather_nd;
vsi_nn_avg_pool3d_param avg_pool3d;
vsi_nn_maxunpool_param maxunpool;
vsi_nn_reversesequence_param reversesequence;
vsi_nn_inverse_sigmoid_param inverse_sigmoid;
vsi_nn_grid_sample_param gridsample;
vsi_nn_bilinear_grid_sample_param bilinear_grid_sample;
vsi_nn_lpnorm_param lpnorm;
void* client_param;
/* custom node data struct define */

View File

@ -48,7 +48,7 @@ extern "C"{
* @see include/custom/custom_ops.def
* @see include/internal/internal_ops.def
*/
typedef int32_t vsi_nn_op_t; enum
typedef int32_t VSI_PUBLIC_TYPE vsi_nn_op_t; enum
{
#define DEF_OP( NAME, ... ) VSI_NN_OP_##NAME,
#include "interface/ops.def"
@ -126,7 +126,7 @@ typedef struct _vsi_nn_op_proc
vsi_nn_op_optimize_t optimize;
uint32_t input_num;
uint32_t output_num;
} vsi_nn_op_proc_t;
} VSI_PUBLIC_TYPE vsi_nn_op_proc_t;
/*------------------------------------
Functions

View File

@ -26,13 +26,6 @@
#include "vsi_nn_feature_config.h"
#ifdef VSI_40BIT_VA_SUPPORT
#ifdef VX_VA40_EXT_SUPPORT
#undef VX_VA40_EXT_SUPPORT
#endif
#define VX_VA40_EXT_SUPPORT 1
#endif
#include <VX/vx_khr_cnn.h>
#include <VX/vx_helper.h>
#include <VX/vx_ext_program.h>
@ -48,12 +41,4 @@
*/
#include "vsi_nn_compatibility.h"
#if defined(__cplusplus)
extern "C"{
#endif
#if defined(__cplusplus)
}
#endif
#endif

View File

@ -87,6 +87,7 @@ typedef enum
VSI_NN_SOURCE_FORMAT_IMAGE_RGB888_PLANAR_SEP,
VSI_NN_SOURCE_FORMAT_IMAGE_YUYV422,
VSI_NN_SOURCE_FORMAT_IMAGE_UYVY422,
VSI_NN_SOURCE_FORMAT_IMAGE_NV21,
} vsi_nn_preprocess_source_format_e;
/**
@ -98,7 +99,7 @@ typedef struct
vsi_nn_preprocess_type_e type;
/** Preprocess paramters */
void* param;
} vsi_nn_preprocess_base_t;
} VSI_PUBLIC_TYPE vsi_nn_preprocess_base_t;
/**
* Postprocess base structure
@ -109,7 +110,7 @@ typedef struct
vsi_nn_postprocess_type_e type;
/** Postrocess paramters */
void* param;
} vsi_nn_postprocess_base_t;
} VSI_PUBLIC_TYPE vsi_nn_postprocess_base_t;
/**
* Process dtype convert parameter structure

View File

@ -44,6 +44,7 @@
#include "vsi_nn_types.h"
#include "vsi_nn_version.h"
#include "vsi_nn_assert.h"
#include "vsi_nn_post.h"
#include "vsi_nn_rnn.h"
#include "vsi_nn_test.h"
#include "vsi_nn_pre_post_process.h"

View File

@ -44,7 +44,7 @@ typedef struct
{
vsi_nn_tensor_id_t output;
vsi_nn_tensor_id_t inputs[VSI_NN_MAX_RNN_CONNECTION_INPUTS];
} vsi_nn_rnn_external_connection_t;
} VSI_PUBLIC_TYPE vsi_nn_rnn_external_connection_t;
/*-------------------------------------------
Procedure to prepare input data, return FALSE

View File

@ -63,7 +63,7 @@ typedef enum
VSI_NN_DIM_FMT_NHWC = 0x01,
VSI_NN_DIM_FMT_NA = 0xFF,
VSI_NN_DIM_FMT_AUTO = VSI_NN_DIM_FMT_NA - 1,
} vsi_nn_dim_fmt_e;
} VSI_PUBLIC_TYPE vsi_nn_dim_fmt_e;
/**
* Quantization type.
@ -125,7 +125,7 @@ typedef struct vsi_nn_dtype
#endif
};
};
} vsi_nn_dtype_t;
} VSI_PUBLIC_TYPE vsi_nn_dtype_t;
/**
* Tensor Attribute
@ -150,15 +150,13 @@ typedef struct vsi_nn_tensor_attr
#ifdef VX_CREATE_TENSOR_SUPPORT_PHYSICAL
vsi_memory_type_e vsi_memory_type;
#endif
#if VX_STREAM_PROCESSOR_SUPPORT
vsi_bool is_dummy;
#endif
} vsi_nn_tensor_attr_t;
// DO NOT modify this struct.
} VSI_PUBLIC_TYPE vsi_nn_tensor_attr_t;
/**
* Tensor structure
*/
struct _vsi_nn_tensor
struct VSI_PUBLIC_TYPE _vsi_nn_tensor
{
/** Tensor attributes */
vsi_nn_tensor_attr_t attr;
@ -168,6 +166,7 @@ struct _vsi_nn_tensor
vx_weights_biases_parameter wb;
/** Mark tensor swapped by vxSwapTensor */
int8_t is_swapped;
// DO NOT modify this struct.
};
/**

View File

@ -321,10 +321,38 @@ OVXLIB_API vsi_status vsi_nn_CopyDataToTensor
);
/**
* Flush Handle
* If you swap the handle of the tensor, you should flush it.
* Swap a tensor's Handle
* Swap handle to old_ptr to read/write, swap new handle to new_ptr to update handle.
*
* @param[in] tensor Tensor handle.
* APP SHOULD maintain handle that created by itself to manage memory correctly,
* never free or wirte data for handel allocated by OVXLIB.
*
* OVXLIB would not maintain original handle anymore if new_ptr == NULL.
*
* Before free data in handle allocated by APP, vsi_nn_SwapHandle(tensor, NULL, &prev_ptr)
* should be called to get contol of handle.
*
* @param[in] tensor Tensor.
* @param[in] new_ptr New handle of tensor.
* @param[in] is_new_ptr_malloc_by_ovxlib If new_ptr is allocated by ovxlib while new_ptr is not NULL.
* @param[out] old_ptr Old handle of tensor.
*
* @return VSI_SUCCESS on success, or error core otherwise.
*/
OVXLIB_API vsi_status vsi_nn_SwapHandle
(
vsi_nn_tensor_t* tensor,
void* new_ptr,
vsi_bool is_new_ptr_malloc_by_ovxlib,
void** old_ptr
);
/**
* Flush Handle
* Call this function to flush new data to the handle in hand.
* vsi_nn_FlushHandle() should be called at last to compleate the data writting operation.
*
* @param[in] tensor Tensor.
*
* @return VSI_SUCCESS on success, or error core otherwise.
*/
@ -333,6 +361,20 @@ OVXLIB_API vsi_status vsi_nn_FlushHandle
const vsi_nn_tensor_t * tensor
);
/**
* Invalidate Handle
* invalidate handle before copy data from tensor handle.
* Before read data in handle, vsi_nn_InvalidateHandle() should be called to do invalidate cache in APP.
*
* @param[in] tensor Tensor.
*
* @return VSI_SUCCESS on success, or error core otherwise.
*/
OVXLIB_API vsi_status vsi_nn_InvalidateHandle
(
const vsi_nn_tensor_t* tensor
);
/**
* Get Tensor Handle
* Get the handle of the tensor
@ -348,6 +390,34 @@ OVXLIB_API vsi_status vsi_nn_GetTensorHandle
void** ptr
);
/**
* Get Tensor is_scalar
* Get the is_scalar of the tensor
*
* @param[in] tensor Tensor.
*
* @return is_scalar flag of the tensor.
*/
OVXLIB_API int8_t vsi_nn_GetTensorIsScalar
(
vsi_nn_tensor_t* tensor
);
/**
* Set Tensor is_scalar
* Set the is_scalar for the tensor
*
* @param[in] tensor Tensor.
* @param[in] new is_scalar value of the tensor.
*
* @return VSI_SUCCESS on success, or error core otherwise.
*/
OVXLIB_API vsi_status vsi_nn_SetTensorIsScalar
(
vsi_nn_tensor_t* tensor,
int8_t is_scalar
);
OVXLIB_API vsi_status vsi_nn_CopyRawDataToTensor
(
vsi_nn_graph_t* graph,
@ -722,13 +792,6 @@ vsi_nn_tensor_t* vsi_nn_ConstTensorAdd_impl
#define vsi_nn_ConstTensorAdd(_graph, _output_attr, ...) \
vsi_nn_ConstTensorAdd_impl(_graph, _output_attr, __VA_ARGS__, END_OF_VARIADIC_ARGUMENTS)
vsi_status vsi_nn_SwapHandle
(
vsi_nn_tensor_t * tensor,
void * new_ptr,
void ** old_ptr
);
vsi_bool vsi_nn_ConvertTensor
(
vsi_nn_graph_t* graph,

View File

@ -27,7 +27,6 @@
#include <stdint.h>
#include "vsi_nn_platform.h"
#include "vsi_nn_feature_config.h"
#if defined(__cplusplus)
extern "C"{
@ -109,7 +108,7 @@ typedef enum
VSI_NN_PAD_AUTO,
VSI_NN_PAD_VALID,
VSI_NN_PAD_SAME
} vsi_nn_pad_e;
} VSI_PUBLIC_TYPE vsi_nn_pad_e;
/** reduce type enum */
typedef enum
@ -142,14 +141,14 @@ typedef enum
{
VSI_NN_ROUND_CEIL,
VSI_NN_ROUND_FLOOR
} vsi_nn_round_type_e;
} VSI_PUBLIC_TYPE vsi_nn_round_type_e;
/** Optimize driction */
typedef enum
{
VSI_NN_OPTIMIZE_FORWARD,
VSI_NN_OPTIMIZE_BACKWARD
} vsi_nn_opt_direction_e;
} VSI_PUBLIC_TYPE vsi_nn_opt_direction_e;
#ifdef VX_CREATE_TENSOR_SUPPORT_PHYSICAL
typedef enum
{
@ -195,7 +194,7 @@ typedef enum
#endif
VSI_NN_TYPE_VDATA = VX_TYPE_USER_STRUCT_START + 0x1,
}vsi_nn_type_e;
} VSI_PUBLIC_TYPE vsi_nn_type_e;
typedef int32_t vsi_nn_activation_e; enum
{
@ -236,7 +235,7 @@ typedef enum
{
VSI_NN_GRAPH_PRELOAD_VIPSRAM,
VSI_NN_GRAPH_PRELOAD_AXISRAM
} vsi_nn_graph_attr_preload_type_e;
} VSI_PUBLIC_TYPE vsi_nn_graph_attr_preload_type_e;
typedef enum _vsi_nn_node_attr_preload_type_e
{
@ -257,23 +256,35 @@ typedef enum _vsi_nn_yuv_type
VSI_NN_YUV_TYPE_UYUV422
}vsi_nn_yuv_type;
typedef enum _vsi_nn_nv_type
{
VSI_NN_YUV_TYPE_NV12,
VSI_NN_YUV_TYPE_NV21
}vsi_nn_nv_type;
typedef enum _vsi_nn_roi_align_type_e
{
VSI_NN_ROI_ALIGN_ANDROID,
VSI_NN_ROI_ALIGN
} vsi_nn_roi_align_type_e;
/** Deprecated */
typedef uint32_t vsi_nn_size_t;
/** Tensor id type */
typedef uint32_t vsi_nn_tensor_id_t;
typedef uint32_t VSI_PUBLIC_TYPE vsi_nn_tensor_id_t;
/** Node id type */
typedef uint32_t vsi_nn_node_id_t;
/** @see _vsi_nn_graph */
typedef struct _vsi_nn_graph vsi_nn_graph_t;
typedef struct _vsi_nn_graph VSI_PUBLIC_TYPE vsi_nn_graph_t;
/** @see _vsi_nn_node */
typedef struct _vsi_nn_node vsi_nn_node_t;
typedef struct _vsi_nn_node VSI_PUBLIC_TYPE vsi_nn_node_t;
/** @see _vsi_nn_tensor */
typedef struct _vsi_nn_tensor vsi_nn_tensor_t;
typedef struct _vsi_nn_tensor VSI_PUBLIC_TYPE vsi_nn_tensor_t;
#if defined(__cplusplus)
}

View File

@ -33,7 +33,7 @@ extern "C"{
#define VSI_NN_VERSION_MAJOR 1
#define VSI_NN_VERSION_MINOR 1
#define VSI_NN_VERSION_PATCH 57
#define VSI_NN_VERSION_PATCH 74
#define VSI_NN_VERSION \
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)

View File

@ -0,0 +1,144 @@
#
# Build Vivante chipinfo for android.
#
LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
ifeq ($(AQROOT),)
$(error Please set AQROOT env first)
endif
include $(AQROOT)/Android.mk.def
ifeq ($(PLATFORM_VENDOR),1)
LOCAL_VENDOR_MODULE := true
endif
LOCAL_SRC_FILES := \
vsi_nn_context.c \
vsi_nn_client_op.c \
vsi_nn_graph.c \
vsi_nn_node_attr_template.c \
vsi_nn_node.c \
vsi_nn_ops.c \
vsi_nn_daemon.c \
vsi_nn_tensor.c \
vsi_nn_version.c \
vsi_nn_rnn.c \
vsi_nn_rnn_helper.c \
vsi_nn_internal_node.c \
vsi_nn_log.c \
vsi_nn_graph_optimization.c \
vsi_nn_pre_post_process.c
LOCAL_SRC_FILES += \
utils/vsi_nn_code_generator.c \
utils/vsi_nn_binary_tree.c \
utils/vsi_nn_map.c \
utils/vsi_nn_hashmap.c \
utils/vsi_nn_link_list.c \
utils/vsi_nn_math.c \
utils/vsi_nn_dtype.c \
utils/vsi_nn_dtype_util.c \
utils/vsi_nn_shape_util.c \
utils/vsi_nn_limits.c \
utils/vsi_nn_tensor_op.c \
utils/vsi_nn_util.c \
utils/vsi_nn_dlfcn.c \
utils/vsi_nn_constraint_check.c
LOCAL_SRC_FILES += \
quantization/vsi_nn_dynamic_fixed_point.c \
quantization/vsi_nn_asymmetric_affine.c \
quantization/vsi_nn_perchannel_symmetric_affine.c \
LOCAL_SRC_FILES += \
post/vsi_nn_post_fasterrcnn.c \
post/vsi_nn_post_cmupose.c
LOCAL_SRC_FILES += \
cpu_backend/vsi_nn_cpu_backend.c \
cpu_backend/vsi_nn_cpu_backend_conv2d.c \
cpu_backend/vsi_nn_cpu_backend_deconv2d.c \
cpu_backend/npuref_interface.c
LOCAL_SRC_FILES += libnnext/vsi_nn_libnnext_resource.c \
libnnext/vsi_nn_vxkernel.c
LOCAL_SRC_FILES += kernel/vsi_nn_kernel.c \
kernel/vsi_nn_kernel_util.c \
kernel/vsi_nn_kernel_backend.c \
kernel/vsi_nn_kernel_eltwise.c \
kernel/vsi_nn_kernel_selector.c \
kernel/vsi_nn_kernel_node.c \
kernel/vsi_nn_kernel_param.c \
kernel/vsi_nn_kernel_gpu_shape_optimize.c \
kernel/vsi_nn_kernel_lut.c \
kernel/vsi_nn_spinst.c \
kernel/vsi_nn_sp_unit_operation.c \
kernel/vsi_nn_sp_lut.c \
kernel/vsi_nn_gpu.c
LIBNNEXT_KERNEL_SOURCES := $(wildcard $(LOCAL_PATH)/libnnext/ops/kernel/*.c)
LOCAL_SRC_FILES += $(LIBNNEXT_KERNEL_SOURCES:$(LOCAL_PATH)/%=%)
KERNEL_SOURCES := $(wildcard $(LOCAL_PATH)/kernel/cl/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/cpu/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/evis/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/vx/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/kernel/sp/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/evis/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/cl/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/cpu/*.c)
KERNEL_SOURCES += $(wildcard $(LOCAL_PATH)/custom/ops/kernel/sp/*.c)
LOCAL_SRC_FILES += $(KERNEL_SOURCES:$(LOCAL_PATH)/%=%)
OPERATION_SOURCES := $(wildcard $(LOCAL_PATH)/ops/*.c)
LOCAL_SRC_FILES += $(OPERATION_SOURCES:$(LOCAL_PATH)/%=%)
LOCAL_SHARED_LIBRARIES := \
liblog \
libjpeg \
libGAL \
libOpenVX \
libVSC \
libdl
LOCAL_C_INCLUDES += \
external/libjpeg-turbo \
$(AQROOT)/sdk/inc/CL \
$(AQROOT)/sdk/inc/VX \
$(AQROOT)/sdk/inc/ \
$(AQROOT)/sdk/inc/HAL \
$(LOCAL_PATH)/../include \
$(LOCAL_PATH)/../include/ops \
$(LOCAL_PATH)/../include/utils \
$(LOCAL_PATH)/../include/infernce \
$(LOCAL_PATH)/../include/client \
$(LOCAL_PATH)/../include/cpu_backend \
$(LOCAL_PATH)/../include/libnnext \
$(LOCAL_PATH)/../src
LOCAL_CFLAGS := \
-DLINUX \
-D'OVXLIB_API=__attribute__((visibility("default")))' \
-DANDROID_SDK_VERSION=$(PLATFORM_SDK_VERSION)\
-Wno-sign-compare \
-Wno-implicit-function-declaration \
-Wno-sometimes-uninitialized \
-Wno-unused-parameter \
-Wno-enum-conversion \
-Wno-missing-field-initializers \
-Wno-tautological-compare \
-Wno-missing-braces
LOCAL_MODULE:= libovxlib
LOCAL_MODULE_TAGS := optional
LOCAL_PRELINK_MODULE := false
include $(BUILD_SHARED_LIBRARY)

View File

@ -0,0 +1,184 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdlib.h>
#include <math.h>
#include "vsi_nn_types.h"
#include "vsi_nn_platform.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_node.h"
#include "vsi_nn_log.h"
#include "vsi_nn_test.h"
#include "vsi_nn_error.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "utils/vsi_nn_dtype_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vsi_nn_vxkernel.h"
#define _CPU_ARG_NUM (1)
#define _CPU_INPUT_NUM (2)
#define _CPU_OUTPUT_NUM (1)
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
#define _KERNEL_NAME ("com.vivantecorp.extension.CustomSampleVXC")
#define SCALAR_INPUT_AXIS (3)
__BEGIN_DECLS
DEF_KERNEL_EXECUTOR(_softmax_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t* param,
size_t param_size
)
{
vsi_status status = VX_SUCCESS;
float *buffer[_CPU_IO_NUM] = {NULL};
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *attr[_CPU_IO_NUM] = {NULL};
uint32_t i = 0, out_elements = 0;
int32_t axis;
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; // input0
tensors[1] = (vsi_nn_kernel_tensor_t)param[1]; // input1
tensors[2] = (vsi_nn_kernel_tensor_t)param[2]; // output
attr[0] = vsi_nn_kernel_tensor_attr_create(tensors[0]);
attr[1] = vsi_nn_kernel_tensor_attr_create(tensors[1]);
attr[2] = vsi_nn_kernel_tensor_attr_create(tensors[2]);
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
CHECK_STATUS_FAIL_GOTO(status, final );
/* alloc the float32 data buffer */
buffer[0] = (float *)vsi_nn_kernel_tensor_create_buffer(tensors[0], attr[0], TRUE);
CHECK_PTR_FAIL_GOTO(buffer[0], "Create input0 buffer fail.", final);
buffer[1] = (float *)vsi_nn_kernel_tensor_create_buffer(tensors[1], attr[1], TRUE);
CHECK_PTR_FAIL_GOTO(buffer[1], "Create input1 buffer fail.", final);
out_elements = (uint32_t)vsi_nn_kernel_tensor_attr_get_size(attr[2]);
buffer[2] = (float *)malloc(out_elements * sizeof(float));
CHECK_PTR_FAIL_GOTO( buffer[2], "Create output buffer fail.", final );
memset(buffer[2], 0, out_elements * sizeof(float));
/* CPU implement */
for(i = 0; i < out_elements; i++)
{
buffer[2][i] = buffer[0][i] + buffer[1][0];
}
status = vsi_nn_kernel_tensor_write_from_float(
tensors[2], attr[2], buffer[2], out_elements );
final:
for(i = 0; i < _CPU_IO_NUM; i ++)
{
if(buffer[i])
{
free(buffer[i]);
}
vsi_nn_kernel_tensor_attr_release(&attr[i]);
}
return status;
}
static vx_param_description_t kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
};
static const vx_kernel_description_t _kernel_info =
{
KERNEL_ID_PLACEHOLDER,
_KERNEL_NAME,
_softmax_compute,
kernel_param_def,
_cnt_of_array( kernel_param_def ),
vsi_nn_KernelValidator,
NULL,
NULL,
vsi_nn_KernelInitializer,
vsi_nn_KernelDeinitializer
};
static vsi_status _query_kernel
(
vsi_nn_tensor_t* const* const inputs,
vsi_nn_tensor_t* const* const outputs,
vsi_nn_kernel_t* kernel
)
{
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
return VSI_SUCCESS;
}
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_SUCCESS;
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
int32_t axis = 0;
axis = vsi_nn_kernel_param_get_int32(params, "axis");
status = _query_kernel(inputs, outputs, kernel);
if(status != VSI_SUCCESS)
{
return NULL;
}
node = vsi_nn_kernel_create_node(graph, kernel);
if(node == NULL)
{
return NULL;
}
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io(backend_params, _CPU_PARAM_NUM,
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM);
backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
graph, I32, &axis);
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param(node, backend_params, _CPU_PARAM_NUM);
vsi_nn_kernel_scalar_release(&backend_params[SCALAR_INPUT_AXIS]);
return node;
}
__END_DECLS
REGISTER_BACKEND_CPU( custom_sample, _setup )

View File

@ -0,0 +1,103 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdlib.h>
#include "vsi_nn_types.h"
#include "vsi_nn_platform.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_node.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_log.h"
#include "kernel/vsi_nn_kernel.h"
static vsi_status op_compute
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_nn_kernel_param_t *param = NULL;
vsi_nn_custom_sample_param *p;
p = &self->nn_param.custom_sample;
param = vsi_nn_kernel_param_create();
vsi_nn_kernel_param_add_int32(param, "axis", p->axis);
self->n = (vx_node)vsi_nn_kernel_selector(
self->graph,
"custom_sample",
inputs, 2,
outputs, 1,
param);
vsi_nn_kernel_param_release(&param);
return VSI_SUCCESS;
}
static vsi_bool op_check
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
/*TODO: Check params. */
return TRUE;
} /* op_check() */
static vsi_bool op_setup
(
vsi_nn_node_t * node,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
if (VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num)
{
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
memmove(outputs[0]->attr.size, inputs[0]->attr.size,
inputs[0]->attr.dim_num * sizeof(vsi_size_t));
}
return TRUE;
}
#ifdef __cplusplus
extern "C" {
#endif
/* Registrar */
DEF_OP_REG
(
/* op_name */ CUSTOM_SAMPLE,
/* init */ NULL,
/* compute */ op_compute,
/* deinit */ vsi_nn_op_common_deinit,
/* check */ op_check,
/* setup */ op_setup,
/* optimize */ NULL,
/* input_num */ 2,
/* output_num */ 1
);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,354 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
typedef enum
{
INTERNAL_KERNEL_AVG_POOL3D,
} _internal_kernel_e;
#define _AVG_POOL3D_KERNEL_SOURCE_NAME "avg_pool3d"
// Add kernel hashtable here
#define AVG_POOL3D_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
#define AVG_POOL3D_KERNELS( IN_DTYPE, OUT_DTYPE ) \
{ AVG_POOL3D_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
CVIVANTE_NAMESPACE("cl.avg_pool3d_"#IN_DTYPE"to"#OUT_DTYPE), \
_AVG_POOL3D_KERNEL_SOURCE_NAME }, \
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _avg_pool3d_kernel_map[] =
{
// Register kernel here
AVG_POOL3D_KERNELS( F32, F32 )
AVG_POOL3D_KERNELS( F32, U32 )
AVG_POOL3D_KERNELS( F32, I32 )
AVG_POOL3D_KERNELS( U32, U32 )
AVG_POOL3D_KERNELS( U32, F32 )
AVG_POOL3D_KERNELS( I32, I32 )
AVG_POOL3D_KERNELS( I32, F32 )
AVG_POOL3D_KERNELS( BF16, BF16 )
};
/*
* Kernel params
*/
static vx_param_description_t _avg_pool3d_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _AVG_POOL3D_PARAM_NUM _cnt_of_array( _avg_pool3d_kernel_param_def )
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_avg_pool3d_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_status status = VSI_FAILURE;
vx_tensor output = (vx_tensor)param[1];
vx_scalar depth_out = (vx_scalar)param[14];
int32_t depth_out_value;
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
vsi_size_array_t *output_shape = NULL;
vxReadScalarValue(depth_out, &depth_out_value);
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
output_shape = output_attr->shape;
gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = depth_out_value;
gpu_param.global_size[0] = (output_shape->data[0] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0];
gpu_param.global_size[1] = (output_shape->data[1] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1];
gpu_param.global_size[2] = (output_shape->data[2] + gpu_param.global_scale[2] - 1)
/ gpu_param.global_scale[2];
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (output_attr)
{
vsi_nn_kernel_tensor_attr_release(&output_attr);
}
return status;
} /* _avg_pool3d_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _avg_pool3d_kernel_map;
size_t kernel_map_size = _cnt_of_array( _avg_pool3d_kernel_map );
vx_param_description_t * param_def = _avg_pool3d_kernel_param_def;
vx_kernel_initialize_f initializer = _avg_pool3d_initializer;
uint32_t key = 0;
uint32_t i = 0;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
(( in_dtype ) | (out_dtype << 8 ))
switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
{
case _PACK_SELECT_KEY(F32, F32):
case _PACK_SELECT_KEY(F16, F16):
case _PACK_SELECT_KEY(F32, F16):
case _PACK_SELECT_KEY(F16, F32):
key = AVG_POOL3D_HASH_KEY( F32, F32);
break;
case _PACK_SELECT_KEY(F32, U8):
case _PACK_SELECT_KEY(F16, U8):
key = AVG_POOL3D_HASH_KEY( F32, U32);
break;
case _PACK_SELECT_KEY(F32, I8):
case _PACK_SELECT_KEY(F32, I16):
case _PACK_SELECT_KEY(F16, I8):
case _PACK_SELECT_KEY(F16, I16):
key = AVG_POOL3D_HASH_KEY( F32, I32);
break;
case _PACK_SELECT_KEY(U8, U8):
key = AVG_POOL3D_HASH_KEY( U32, U32);
break;
case _PACK_SELECT_KEY(U8, F16):
case _PACK_SELECT_KEY(U8, F32):
key = AVG_POOL3D_HASH_KEY( U32, F32);
break;
case _PACK_SELECT_KEY(I8, I8):
case _PACK_SELECT_KEY(I8, I16):
case _PACK_SELECT_KEY(I16, I8):
case _PACK_SELECT_KEY(I16, I16):
key = AVG_POOL3D_HASH_KEY( I32, I32);
break;
case _PACK_SELECT_KEY(I8, F16):
case _PACK_SELECT_KEY(I8, F32):
case _PACK_SELECT_KEY(I16, F16):
case _PACK_SELECT_KEY(I16, F32):
key = AVG_POOL3D_HASH_KEY( I32, F32);
break;
default:
key = AVG_POOL3D_HASH_KEY( in_dtype, out_dtype);
break;
}
#undef _PACK_SELECT_KEY
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = _cnt_of_array( _avg_pool3d_kernel_param_def );
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_AVG_POOL3D_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
int32_t width = (int32_t)inputs[0]->attr.size[0];
int32_t height = (int32_t)inputs[0]->attr.size[1];
int32_t ksize_x = vsi_nn_kernel_param_get_int32(params, "ksize_x");
int32_t ksize_y = vsi_nn_kernel_param_get_int32(params, "ksize_y");
int32_t ksize_z = vsi_nn_kernel_param_get_int32(params, "ksize_z");
int32_t stride_x = vsi_nn_kernel_param_get_int32(params, "stride_x");
int32_t stride_y = vsi_nn_kernel_param_get_int32(params, "stride_y");
int32_t stride_z = vsi_nn_kernel_param_get_int32(params, "stride_z");
int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
int32_t pad_front = vsi_nn_kernel_param_get_int32(params, "pad_front");
int32_t depth_in = vsi_nn_kernel_param_get_int32(params, "depth_in");
int32_t depth_out = vsi_nn_kernel_param_get_int32(params, "depth_out");
int32_t count_include_pad = vsi_nn_kernel_param_get_int32(params, "count_include_pad");
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
inputs[0]->attr.dim_num )
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ))
{
return NULL;
}
outputScale = 1.0f / outputScale;
inputTail = -(inputTail * inputScale);
status = _query_kernel( kernel, inputs, outputs );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
uint32_t index = 2;
vsi_nn_kernel_node_pack_io( node_params, _AVG_POOL3D_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_x );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_y );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &ksize_z );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_x );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_y );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &stride_z );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_front );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &depth_in );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &depth_out );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &count_include_pad );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _AVG_POOL3D_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[2] );
vsi_nn_kernel_scalar_release( &node_params[3] );
vsi_nn_kernel_scalar_release( &node_params[4] );
vsi_nn_kernel_scalar_release( &node_params[5] );
vsi_nn_kernel_scalar_release( &node_params[6] );
vsi_nn_kernel_scalar_release( &node_params[7] );
vsi_nn_kernel_scalar_release( &node_params[8] );
vsi_nn_kernel_scalar_release( &node_params[9] );
vsi_nn_kernel_scalar_release( &node_params[10] );
vsi_nn_kernel_scalar_release( &node_params[11] );
vsi_nn_kernel_scalar_release( &node_params[12] );
vsi_nn_kernel_scalar_release( &node_params[13] );
vsi_nn_kernel_scalar_release( &node_params[14] );
vsi_nn_kernel_scalar_release( &node_params[15] );
vsi_nn_kernel_scalar_release( &node_params[16] );
vsi_nn_kernel_scalar_release( &node_params[17] );
vsi_nn_kernel_scalar_release( &node_params[18] );
vsi_nn_kernel_scalar_release( &node_params[19] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( avg_pool3d, _setup )

View File

@ -0,0 +1,381 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
typedef enum
{
INTERNAL_KERNEL_BILINEAR_GRID_SAMPLE,
} _internal_kernel_e;
#define _BILINEAR_GRID_SAMPLE_KERNEL_SOURCE() "bilinear_grid_sample"
#define STR(a) #a
// Add kernel hashtable here
#define BILINEAR_GRID_SAMPLE_HASH_KEY(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE) \
((IN1_DTYPE << 20) | (IN0_DTYPE << 8) | (OUT_DTYPE))
#define PACK_KERNEL_MAP(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE) \
{ \
BILINEAR_GRID_SAMPLE_HASH_KEY(IN0_DTYPE, IN1_DTYPE, OUT_DTYPE), \
CVIVANTE_NAMESPACE("cl.bilinear_grid_sample_" STR(IN0_DTYPE) "_" STR(IN1_DTYPE) "to" STR(OUT_DTYPE)), \
_BILINEAR_GRID_SAMPLE_KERNEL_SOURCE() \
}
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _bilinear_grid_sample_kernel_map[] =
{
// Register kernel here
PACK_KERNEL_MAP(F32, F32, F32 ),
PACK_KERNEL_MAP(U8, U8, U8),
};
/*
* Kernel params
*/
static vx_param_description_t _bilinear_grid_sample_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _BILINEAR_GRID_SAMPLE_PARAM_NUM 8
#define _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM \
_cnt_of_array(_bilinear_grid_sample_kernel_param_def)
#define SCALAR_HALF_INPUT0_W (3)
#define SCALAR_HALF_INPUT0_H (4)
#define SCALAR_ADD_VALUE_W (5)
#define SCALAR_ADD_VALUE_H (6)
#define SCALAR_DEPTH (7)
#define SCALAR_INPUT0_SCALE (8)
#define SCALAR_INPUT0_TAIL (9)
#define SCALAR_INPUT1_SCALE (10)
#define SCALAR_INPUT1_TAIL (11)
#define SCALAR_OUTPUT_SCALE (12)
#define SCALAR_OUTPUT_TAIL (13)
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_bilinear_grid_sample_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
gpu_param_t gpu_param = {3, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
vsi_nn_kernel_tensor_attr_t* output_attr = NULL;
vsi_size_array_t* out_shape = NULL;
output_attr =
vsi_nn_kernel_tensor_attr_create((vsi_nn_kernel_tensor_t)param[2]);
CHECK_PTR_FAIL_GOTO(output_attr, "Create tensor attr buffer fail.", final);
out_shape = output_attr->shape;
gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
gpu_param.dim = 2;
gpu_param.global_size[0] =
gpu_align_p2((out_shape->data[0] + gpu_param.global_scale[0] - 1) /
gpu_param.global_scale[0],
4);
gpu_param.global_size[1] =
((out_shape->data[1] + gpu_param.global_scale[1] - 1) /
gpu_param.global_scale[1]);
gpu_param.global_size[2] = 1;
status = vsi_nn_kernel_gpu_config(node, &gpu_param);
final:
#define SAFE_FREE_TENSOR_ATTR(_PTR) \
if (_PTR) { \
vsi_nn_kernel_tensor_attr_release(&_PTR); \
_PTR = NULL; \
}
SAFE_FREE_TENSOR_ATTR(output_attr);
return status;
} /* _bilinear_grid_sample_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
vsi_bool* is_use_u8_kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in0_dtype, in1_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _bilinear_grid_sample_kernel_map;
size_t kernel_map_size = _cnt_of_array( _bilinear_grid_sample_kernel_map );
vx_param_description_t * param_def = _bilinear_grid_sample_kernel_param_def;
size_t param_def_size = _cnt_of_array(_bilinear_grid_sample_kernel_param_def);
vx_kernel_initialize_f initializer = _bilinear_grid_sample_initializer;
uint32_t key;
uint32_t i;
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (F16 == in0_dtype) {
in0_dtype = F32;
}
if (F16 == in1_dtype) {
in1_dtype = F32;
}
if (F16 == out_dtype) {
out_dtype = F32;
}
if ((U8 == in0_dtype) || (U8 == out_dtype)) {
param_def_size = _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM;
*is_use_u8_kernel = TRUE;
} else {
param_def_size = _BILINEAR_GRID_SAMPLE_PARAM_NUM;
*is_use_u8_kernel = FALSE;
}
key = BILINEAR_GRID_SAMPLE_HASH_KEY(in0_dtype, in1_dtype, out_dtype);
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = (uint32_t)param_def_size;
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_nn_kernel_node_t node = NULL;
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM];
vsi_size_t final_shape[VSI_NN_MAX_DIM_NUM] = {1, 1, 1, 1};
uint32_t final_in1_rank = 0;
vsi_nn_tensor_t* rs_tensors = NULL;
vsi_nn_tensor_t* final_tensors[3] = {NULL};
vsi_size_t in0_width = inputs[0]->attr.size[0];
vsi_size_t in0_height = inputs[0]->attr.size[1];
float input0_zp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float input0_scale = vsi_nn_get_tensor_scale(inputs[0]);
float input0_tail = -(input0_zp * input0_scale);
float input1_zp = (float)vsi_nn_get_tensor_zero_point(inputs[1]);
float input1_scale = vsi_nn_get_tensor_scale(inputs[1]);
float input1_tail = -(input1_zp * input1_scale);
float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
float output_scale = 1.0f / vsi_nn_get_tensor_scale(outputs[0]);
vsi_bool is_use_u8_kernel = FALSE;
int32_t align_corners =
vsi_nn_kernel_param_get_int32(params, "align_corners");
uint32_t pad_val = 0;
int32_t depth = 0;
vsi_nn_kernel_dtype_e in0_dtype;
float half_input0_w, half_input0_h, add_float_value_w, add_float_value_h;
// Check if gpu can support the size
if (!vsi_nn_kernel_gpu_check_shape(inputs[0]->attr.size,
inputs[0]->attr.dim_num)) {
return NULL;
}
if (!vsi_nn_kernel_gpu_check_shape(inputs[1]->attr.size,
inputs[1]->attr.dim_num)) {
return NULL;
}
final_tensors[0] = inputs[0];
if (inputs[1]->attr.dim_num >= 3) {
final_shape[0] = inputs[1]->attr.size[1] * inputs[1]->attr.size[0];
final_shape[1] = inputs[1]->attr.size[2];
final_shape[2] = 1;
final_shape[3] = inputs[1]->attr.dim_num > 3 ? inputs[1]->attr.size[3] : 1;
final_in1_rank =
inputs[1]->attr.dim_num == 3 ? 2 : inputs[1]->attr.dim_num;
if (!vsi_nn_kernel_gpu_check_shape(final_shape, final_in1_rank)) {
return NULL;
}
rs_tensors = vsi_nn_reshape_tensor(graph, inputs[1], final_shape, final_in1_rank);
final_tensors[1] = rs_tensors;
} else {
final_tensors[1] = inputs[1];
}
final_tensors[2] = outputs[0];
if (align_corners) {
half_input0_w = ((float)in0_width - 1.0f) * 0.5f;
half_input0_h = ((float)in0_height - 1.0f) * 0.5f;
add_float_value_w = half_input0_w;
add_float_value_h = half_input0_h;
} else {
half_input0_w = (float)in0_width * 0.5f;
half_input0_h = (float)in0_height * 0.5f;
add_float_value_w = half_input0_w - 0.5f;
add_float_value_h = half_input0_h - 0.5f;
}
depth = (int32_t)inputs[0]->attr.size[2];
in0_dtype = vsi_nn_kernel_map_dtype(inputs[0]->attr.dtype.vx_type);
if (U8 == in0_dtype) {
pad_val = inputs[0]->attr.dtype.zero_point;
}
status = _query_kernel(kernel, inputs, outputs, &is_use_u8_kernel);
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
size_t node_params_num = _BILINEAR_GRID_SAMPLE_PARAM_NUM;
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM,
final_tensors, input_num, &final_tensors[2], output_num );
node_params[SCALAR_HALF_INPUT0_W] = vsi_nn_kernel_scalar_create( graph, F32, &half_input0_w );
node_params[SCALAR_HALF_INPUT0_H] = vsi_nn_kernel_scalar_create( graph, F32, &half_input0_h );
node_params[SCALAR_ADD_VALUE_W] = vsi_nn_kernel_scalar_create( graph, F32, &add_float_value_w );
node_params[SCALAR_ADD_VALUE_H] = vsi_nn_kernel_scalar_create( graph, F32, &add_float_value_h );
node_params[SCALAR_DEPTH] = vsi_nn_kernel_scalar_create( graph, I32, &depth );
if (is_use_u8_kernel)
{
node_params[SCALAR_INPUT0_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input0_scale );
node_params[SCALAR_INPUT0_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &input0_tail );
node_params[SCALAR_INPUT1_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input1_scale );
node_params[SCALAR_INPUT1_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &input1_tail );
node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
node_params[SCALAR_OUTPUT_TAIL] = vsi_nn_kernel_scalar_create( graph, F32, &output_zp );
node_params_num = _BILINEAR_GRID_SAMPLE_PARAM_QUANT_NUM;
}
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
VSI_ASSERT(status == VSI_SUCCESS);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_HALF_INPUT0_W]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_HALF_INPUT0_H]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_ADD_VALUE_W]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_ADD_VALUE_H]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_DEPTH]);
if (is_use_u8_kernel) {
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT0_SCALE]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT0_TAIL]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT1_SCALE]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_INPUT1_TAIL]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_OUTPUT_SCALE]);
vsi_nn_kernel_scalar_release(&node_params[SCALAR_OUTPUT_TAIL]);
}
{
// Set default border mode.
vx_border_t border;
border.mode = VX_BORDER_CONSTANT;
border.constant_value.U32 = pad_val;
status = vxSetNodeAttribute(
(vx_node)node, VX_NODE_BORDER, &border, sizeof(border));
CHECK_STATUS(status);
}
}
}
vsi_safe_release_tensor(rs_tensors);
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( bilinear_grid_sample, _setup )

View File

@ -35,6 +35,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
__BEGIN_DECLS
@ -258,19 +259,36 @@ static vsi_nn_kernel_node_t _setup
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float min_value = vsi_nn_kernel_param_get_float32( params, "min_value" );
float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
vsi_size_t new_rank = 0;
vsi_bool ret = TRUE;
outputScale = 1.0f / outputScale;
inputTail = -(inputTail * inputScale);
ret = vsi_nn_kernel_optimize_element_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num, shape, &new_rank);
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
inputs[0]->attr.dim_num ) )
if ( ret )
{
return NULL;
}
image_2d = (inputs[0]->attr.dim_num == 2 || inputs[0]->attr.size[2] == 1);
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shape, new_rank );
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
outputs[0], shape, new_rank );
status = _query_kernel( kernel, inputs, outputs, image_2d);
outputScale = 1.0f / outputScale;
inputTail = -(inputTail * inputScale);
if( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[0]->attr.size,
reshape_tensors[0]->attr.dim_num ) )
{
return NULL;
}
image_2d = (reshape_tensors[0]->attr.dim_num == 2 || reshape_tensors[0]->attr.size[2] == 1);
status = _query_kernel( kernel, reshape_tensors, &reshape_tensors[1], image_2d);
if ( VSI_SUCCESS == status )
{
@ -279,7 +297,7 @@ static vsi_nn_kernel_node_t _setup
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _CLIP_PARAM_NUM,
inputs, input_num, outputs, output_num );
reshape_tensors, input_num, &reshape_tensors[1], output_num );
node_params[SCALAR_MIN_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &min_value );
node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
@ -297,6 +315,10 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
}
}
vsi_safe_release_tensor( reshape_tensors[0] );
vsi_safe_release_tensor( reshape_tensors[1] );
return node;
} /* _setup() */

View File

@ -34,6 +34,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
@ -287,7 +288,7 @@ static vsi_status _query_kernel
int i;
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
input1_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (outputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_NONE && output_dtype == I8)
@ -335,31 +336,85 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_CL_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
int32_t operation = 0;
int32_t operation = vsi_nn_kernel_param_get_int32( params, "operation" );
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = { { 0 } };
vsi_size_t new_rank = 0;
vsi_bool ret = FALSE;
float input0Scale = vsi_nn_get_tensor_scale(inputs[0]);
float input0Tail = (float)vsi_nn_get_tensor_zero_point(inputs[0]) * input0Scale;
float input1Scale = vsi_nn_get_tensor_scale(inputs[1]);
float input1Tail = (float)vsi_nn_get_tensor_zero_point(inputs[1]) * input1Scale;
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
ret = vsi_nn_kernel_optimize_eltwise_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num,
inputs[1]->attr.size, inputs[1]->attr.dim_num,
outputs[0]->attr.size, outputs[0]->attr.dim_num,
shapes[0], shapes[1], shapes[2], &new_rank );
if ( ret )
{
return NULL;
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shapes[0], new_rank );
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
inputs[1], shapes[1], new_rank );
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
outputs[0], shapes[2], new_rank );
#define _swap_tensor(a, b, tmp) \
do { \
tmp = a; \
a = b; \
b = tmp; \
} while(0)
if (shapes[1][3] > shapes[0][3] && new_rank == 4)
{
vsi_nn_tensor_t* reshape_tmp;
_swap_tensor(reshape_tensors[0], reshape_tensors[1], reshape_tmp);
if (VSI_NN_RELATIONAL_OPS_GREAT == operation)
{
operation = VSI_NN_RELATIONAL_OPS_LESS;
}
else if (VSI_NN_RELATIONAL_OPS_LESS == operation)
{
operation = VSI_NN_RELATIONAL_OPS_GREAT;
}
else if (VSI_NN_RELATIONAL_OPS_GREAT_EQUAL == operation)
{
operation = VSI_NN_RELATIONAL_OPS_LESS_EQUAL;
}
else if (VSI_NN_RELATIONAL_OPS_LESS_EQUAL == operation)
{
operation = VSI_NN_RELATIONAL_OPS_GREAT_EQUAL;
}
}
#undef _swap_tensor
}
else
{
goto final;
}
operation = vsi_nn_kernel_param_get_int32( params, "operation" );
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[2]->attr.size,
reshape_tensors[2]->attr.dim_num ) )
{
goto final;
}
image_2d = (outputs[0]->attr.dim_num == 2);
status = _query_kernel( inputs, outputs, operation, image_2d, kernel );
if( VSI_SUCCESS == status)
image_2d = (reshape_tensors[2]->attr.dim_num == 2 || reshape_tensors[2]->attr.size[2] == 1);
status = _query_kernel( reshape_tensors, &reshape_tensors[2], operation, image_2d, kernel );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
if ( node )
{
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
inputs, 2, outputs, 1 );
reshape_tensors, 2, &reshape_tensors[2], 1 );
node_params[SCALAR_INPUT0_SCALE] = vsi_nn_kernel_scalar_create(
graph, F32, &input0Scale );
node_params[SCALAR_INPUT0_TAIL] = vsi_nn_kernel_scalar_create(
@ -379,6 +434,12 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT1_TAIL] );
}
}
final:
vsi_safe_release_tensor( reshape_tensors[0] );
vsi_safe_release_tensor( reshape_tensors[1] );
vsi_safe_release_tensor( reshape_tensors[2] );
return node;
} /* _setup() */

View File

@ -69,14 +69,19 @@ static const struct {
{
HASH_CUMSUM_KERNELS(0, U8, U8)
HASH_CUMSUM_KERNELS(0, F32, F32)
HASH_CUMSUM_KERNELS(0, F32, U8)
HASH_CUMSUM_KERNELS(1, U8, U8)
HASH_CUMSUM_KERNELS(1, F32, F32)
HASH_CUMSUM_KERNELS(1, F32, U8)
HASH_CUMSUM_KERNELS(2, U8, U8)
HASH_CUMSUM_KERNELS(2, F32, F32)
HASH_CUMSUM_KERNELS(2, F32, U8)
HASH_CUMSUM_KERNELS_2D(0, U8, U8)
HASH_CUMSUM_KERNELS_2D(0, F32, F32)
HASH_CUMSUM_KERNELS_2D(0, F32, U8)
HASH_CUMSUM_KERNELS_2D(1, U8, U8)
HASH_CUMSUM_KERNELS_2D(1, F32, F32)
HASH_CUMSUM_KERNELS_2D(1, F32, U8)
};
/*

View File

@ -56,6 +56,10 @@ typedef enum
UNARY_RCP,
UNARY_SIGN,
UNARY_SOFTSIGN,
UNARY_ATAN,
UNARY_ATANH,
UNARY_ACOSH,
UNARY_INVERSE_SIGMOID,
} unary_type_e;
/*
@ -100,10 +104,18 @@ typedef enum
#define RCP_OPERATION rcp
#define SIGN_OPERATION sign
#define SOFTSIGN_OPERATION softsign
#define ATAN_OPERATION atan
#define ATANH_OPERATION atanh
#define ACOSH_OPERATION acosh
#define INVERSE_SIGMOID_OPERATION inverse_sigmoid
#define ADD_UNARY_SH_KERNELS(name, src_type, dst_type) \
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, src_type, dst_type) \
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, src_type, dst_type)
#define ADD_UNARY_SH_KERNELS(name) \
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, F32, F32) \
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, F32, F32) \
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, U8, U8) \
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, U8, U8) \
TENSOR_UNARY_KERNELS_3D(name##_OPERATION, UNARY_##name, U8, F32) \
TENSOR_UNARY_KERNELS_2D(name##_OPERATION, UNARY_##name, U8, F32)
static const struct {
uint32_t key;
@ -111,39 +123,28 @@ static const struct {
const char* source_name;
} kernel_map[] =
{
ADD_UNARY_SH_KERNELS(SIN, F32, F32)
ADD_UNARY_SH_KERNELS(COS, F32, F32)
ADD_UNARY_SH_KERNELS(EXP, F32, F32)
ADD_UNARY_SH_KERNELS(LOG, F32, F32)
ADD_UNARY_SH_KERNELS(NEG, F32, F32)
ADD_UNARY_SH_KERNELS(HSIGMOID, F32, F32)
ADD_UNARY_SH_KERNELS(MISH, F32, F32)
ADD_UNARY_SH_KERNELS(ROUND, F32, F32)
ADD_UNARY_SH_KERNELS(GELU, F32, F32)
ADD_UNARY_SH_KERNELS(HGELU, F32, F32)
ADD_UNARY_SH_KERNELS(SELU, F32, F32)
ADD_UNARY_SH_KERNELS(CELU, F32, F32)
ADD_UNARY_SH_KERNELS(RCP, F32, F32)
ADD_UNARY_SH_KERNELS(SIGN, F32, F32)
ADD_UNARY_SH_KERNELS(SOFTSIGN, F32, F32)
ADD_UNARY_SH_KERNELS(SIN)
ADD_UNARY_SH_KERNELS(COS)
ADD_UNARY_SH_KERNELS(EXP)
ADD_UNARY_SH_KERNELS(LOG)
ADD_UNARY_SH_KERNELS(NEG)
ADD_UNARY_SH_KERNELS(HSIGMOID)
ADD_UNARY_SH_KERNELS(MISH)
ADD_UNARY_SH_KERNELS(ROUND)
ADD_UNARY_SH_KERNELS(GELU)
ADD_UNARY_SH_KERNELS(HGELU)
ADD_UNARY_SH_KERNELS(SELU)
ADD_UNARY_SH_KERNELS(CELU)
ADD_UNARY_SH_KERNELS(RCP)
ADD_UNARY_SH_KERNELS(SIGN)
ADD_UNARY_SH_KERNELS(SOFTSIGN)
ADD_UNARY_SH_KERNELS(ATAN)
ADD_UNARY_SH_KERNELS(ATANH)
ADD_UNARY_SH_KERNELS(ACOSH)
ADD_UNARY_SH_KERNELS(INVERSE_SIGMOID)
ADD_UNARY_SH_KERNELS(SIN, U8, U8)
ADD_UNARY_SH_KERNELS(COS, U8, U8)
ADD_UNARY_SH_KERNELS(EXP, U8, U8)
ADD_UNARY_SH_KERNELS(LOG, U8, U8)
ADD_UNARY_SH_KERNELS(NEG, U8, U8)
ADD_UNARY_SH_KERNELS(HSIGMOID, U8, U8)
ADD_UNARY_SH_KERNELS(MISH, U8, U8)
ADD_UNARY_SH_KERNELS(ROUND, U8, U8)
ADD_UNARY_SH_KERNELS(GELU, U8, U8)
ADD_UNARY_SH_KERNELS(HGELU, U8, U8)
ADD_UNARY_SH_KERNELS(SELU, U8, U8)
ADD_UNARY_SH_KERNELS(CELU, U8, U8)
ADD_UNARY_SH_KERNELS(RCP, U8, U8)
ADD_UNARY_SH_KERNELS(SIGN, U8, U8)
ADD_UNARY_SH_KERNELS(SOFTSIGN, U8, U8)
ADD_UNARY_SH_KERNELS(NEG, I32, I32)
TENSOR_UNARY_KERNELS_3D(NEG_OPERATION, UNARY_NEG, I32, I32)
TENSOR_UNARY_KERNELS_2D(NEG_OPERATION, UNARY_NEG, I32, I32)
};
#undef SIN_OPERATION
@ -161,6 +162,10 @@ static const struct {
#undef RCP_OPERATION
#undef SIGN_OPERATION
#undef SOFTSIGN_OPERATION
#undef ATAN_OPERATION
#undef ATANH_OPERATION
#undef ACOSH_OPERATION
#undef INVERSE_SIGMOID_OPERATION
/*
* Kernel params
*/
@ -262,6 +267,10 @@ static vsi_status _query_kernel
case _PACK_SELECT_KEY(F16, F16):
key = HASH_UNARY_KEY( type, F32, F32, image_2d );
break;
case _PACK_SELECT_KEY(U8, F32):
case _PACK_SELECT_KEY(U8, F16):
key = HASH_UNARY_KEY( type, U8, F32, image_2d );
break;
default:
key = HASH_UNARY_KEY( type, input_dtype, output_dtype, image_2d );
break;
@ -330,7 +339,7 @@ static vsi_nn_kernel_node_t _setup
ret = vsi_nn_kernel_optimize_element_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num,
shape, &new_rank );
if( ret )
if ( ret )
{
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shape, new_rank );
@ -338,7 +347,7 @@ static vsi_nn_kernel_node_t _setup
outputs[0], shape, new_rank );
}
if( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
if ( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
rs_tensors[0]->attr.dim_num ) )
{
return NULL;
@ -348,11 +357,11 @@ static vsi_nn_kernel_node_t _setup
image_2d = (rs_tensors[0]->attr.dim_num == 2 || rs_tensors[0]->attr.size[2] == 1);
status = _query_kernel( rs_tensors, &rs_tensors[1], unary_type, image_2d, kernel );
if( VSI_SUCCESS == status)
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
if ( node )
{
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
rs_tensors, 1, &rs_tensors[1], 1 );
@ -452,5 +461,9 @@ REGISTER_ELTWISE_UNARY_BACKEND_CL( celu, UNARY_CELU )
REGISTER_ELTWISE_UNARY_BACKEND_CL( rcp, UNARY_RCP )
REGISTER_ELTWISE_UNARY_BACKEND_CL( sign, UNARY_SIGN )
REGISTER_ELTWISE_UNARY_BACKEND_CL( softsign, UNARY_SOFTSIGN )
REGISTER_ELTWISE_UNARY_BACKEND_CL( atan, UNARY_ATAN )
REGISTER_ELTWISE_UNARY_BACKEND_CL( atanh, UNARY_ATANH )
REGISTER_ELTWISE_UNARY_BACKEND_CL( acosh, UNARY_ACOSH )
REGISTER_ELTWISE_UNARY_BACKEND_CL( inverse_sigmoid, UNARY_INVERSE_SIGMOID )
__END_DECLS

View File

@ -49,6 +49,7 @@ typedef enum
#define _GATHER_KERNEL_SOURCE "gather"
#define _GATHER_BATCH_KERNEL_SOURCE "gather_batch"
#define _GATHER_ARRAY_KERNEL_SOURCE "gather_array"
// Add kernel hashtable here
#define VX_KERNEL_NAME_GATHER_U8TOU8 CVIVANTE_NAMESPACE("cl.gather_U8toU8")
@ -61,9 +62,14 @@ typedef enum
#define VX_KERNEL_NAME_GATHER_BATCH_I32TOI32 CVIVANTE_NAMESPACE("cl.gather_batch_I32toI32")
#define VX_KERNEL_NAME_GATHER_BATCH_F32TOF32 CVIVANTE_NAMESPACE("cl.gather_batch_F32toF32")
#define VX_KERNEL_NAME_GATHER_ARRAY_U8TOU8 CVIVANTE_NAMESPACE("cl.gather_array_U8toU8")
#define VX_KERNEL_NAME_GATHER_ARRAY_F16TOF16 CVIVANTE_NAMESPACE("cl.gather_array_F16toF16")
#define VX_KERNEL_NAME_GATHER_ARRAY_I32TOI32 CVIVANTE_NAMESPACE("cl.gather_array_I32toI32")
#define VX_KERNEL_NAME_GATHER_ARRAY_F32TOF32 CVIVANTE_NAMESPACE("cl.gather_array_F32toF32")
// Add kernel hashtable here
#define HASH_GATHER_KEY(_input0_type, _input1_type, _output_type, _image_2d, _batch) \
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_image_2d << 4) | (_batch))
#define HASH_GATHER_KEY(_input0_type, _input1_type, _output_type, _is_array, _batch) \
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_is_array << 4) | (_batch))
#define TENSOR_GATHER_KERNELS(IN0_TYPE, IN1TYPE, OUT_TYPE, SOURCE) \
{ HASH_GATHER_KEY(IN0_TYPE, IN1TYPE, OUT_TYPE, 0, 0), \
@ -75,6 +81,11 @@ typedef enum
VX_KERNEL_NAME_GATHER_BATCH_##IN0_TYPE##TO##OUT_TYPE, \
SOURCE },
#define TENSOR_GATHER_ARRAY_KERNELS(IN0_TYPE, IN1TYPE, OUT_TYPE, SOURCE) \
{ HASH_GATHER_KEY(IN0_TYPE, IN1TYPE, OUT_TYPE, 1, 0), \
VX_KERNEL_NAME_GATHER_ARRAY_##IN0_TYPE##TO##OUT_TYPE, \
SOURCE },
static const struct {
uint32_t key;
char* function_name;
@ -89,6 +100,10 @@ static const struct {
TENSOR_GATHER_BATCH_KERNELS(F16, I32, F16, _GATHER_BATCH_KERNEL_SOURCE)
TENSOR_GATHER_BATCH_KERNELS(I32, I32, I32, _GATHER_BATCH_KERNEL_SOURCE)
TENSOR_GATHER_BATCH_KERNELS(F32, I32, F32, _GATHER_BATCH_KERNEL_SOURCE)
TENSOR_GATHER_ARRAY_KERNELS(U8, I32, U8, _GATHER_ARRAY_KERNEL_SOURCE)
TENSOR_GATHER_ARRAY_KERNELS(F16, I32, F16, _GATHER_ARRAY_KERNEL_SOURCE)
TENSOR_GATHER_ARRAY_KERNELS(I32, I32, I32, _GATHER_ARRAY_KERNEL_SOURCE)
TENSOR_GATHER_ARRAY_KERNELS(F32, I32, F32, _GATHER_ARRAY_KERNEL_SOURCE)
};
/*
@ -114,7 +129,8 @@ static vsi_status cal_gather_tensor_reshape_size
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
uint32_t block_size,
vsi_size_t batch_dims,
uint32_t idxFlg
uint32_t idxFlg,
int32_t* arrayFlg
)
{
vsi_status status = VSI_FAILURE;
@ -148,18 +164,19 @@ static vsi_status cal_gather_tensor_reshape_size
}
else
{
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
sizes[2] = outerCnt;
if ((elementCnt / block_size) >= VSI_NN_MAX_IMAGE_WIDTH)
{
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
sizes[2] = outerCnt;
status = VSI_SUCCESS;
arrayFlg[0] |= 1;
}
status = VSI_SUCCESS;
}
#undef VSI_NN_MAX_IMAGE_WIDTH
return status;
} /* _get_EltOP_tensor_reshape_size */
} /* cal_gather_tensor_reshape_size */
/*
* Kernel initializer
@ -209,8 +226,7 @@ DEF_KERNEL_INITIALIZER(_gather_initializer)
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
gpu_param.global_size[0] = gpu_align_p2((block_size + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0], 4);
gpu_param.global_size[0] = block_size;
gpu_param.global_size[1] = indices_num;
gpu_param.global_size[2] = block_num;
@ -239,7 +255,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
int32_t is_batch
int32_t is_batch,
int32_t is_array
/* Add extra params */
)
{
@ -262,7 +279,7 @@ static vsi_status _query_kernel
output_dtype = I32;
}
key = HASH_GATHER_KEY( input0_dtype, I32, output_dtype, 0, is_batch );
key = HASH_GATHER_KEY( input0_dtype, I32, output_dtype, is_array, is_batch );
for ( i = 0; i < _cnt_of_array(gather_map); i ++ )
{
@ -314,11 +331,12 @@ static vsi_nn_kernel_node_t _setup
int32_t indices_num = vsi_nn_kernel_param_get_int32( params, "indices_num" );
int32_t is_batch = batch_dims > 0 ? 1 : 0;
vsi_size_t rs_dim = batch_dims == 0 ? 2 : 3;
int32_t is_array = block_size >= GPU_TENSOR_MAX_WIDTH ? 1 : 0;
int32_t i = 0;
status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0);
status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1);
status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0);
status = cal_gather_tensor_reshape_size(&inputs[0], shapes[0], block_size, batch_dims, 0, &is_array);
status |= cal_gather_tensor_reshape_size(&inputs[1], shapes[1], 1, batch_dims, 1, &is_array);
status |= cal_gather_tensor_reshape_size(&outputs[0], shapes[2], block_size, batch_dims, 0, &is_array);
if (status != VSI_SUCCESS)
{
return NULL;
@ -337,7 +355,7 @@ static vsi_nn_kernel_node_t _setup
return NULL;
}
status = _query_kernel( kernel, inputs, outputs, is_batch );
status = _query_kernel( kernel, inputs, outputs, is_batch, is_array );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );

View File

@ -43,6 +43,7 @@ __BEGIN_DECLS
*/
#define KERNEL_SOURCE_1 "gather_nd"
#define KERNEL_SOURCE_2 "gather_nd_3d"
#define KERNEL_SOURCE_3 "gather_nd_batch"
typedef enum
{
@ -52,17 +53,25 @@ __BEGIN_DECLS
_3D
} vsi_nn_kernel_coord_type_e;
#define HASH_GATHER_ND_KEY(_input0_type, _input1_type, _output_type, _coord_dim) \
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_coord_dim))
#define HASH_GATHER_ND_KEY(_input0_type, _input1_type, _output_type, _coord_dim, _batch_dims) \
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_coord_dim << 4) | (_batch_dims))
#define HASH_GATHER_ND_SH_KERNEL_NAME(SRC0_TYPE, DST_TYPE, COORD_TYPE) \
CVIVANTE_NAMESPACE("cl.gather_nd_"#SRC0_TYPE"to"#DST_TYPE#COORD_TYPE)
#define TENSOR_GATHER_ND_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, SOURCE) \
{ HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE), \
{ HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, 0), \
HASH_GATHER_ND_SH_KERNEL_NAME(IN0_TYPE, OUT_TYPE, COORD_TYPE), \
SOURCE },
#define HASH_GATHER_ND_BATCH_SH_KERNEL_NAME(SRC0_TYPE, DST_TYPE, COORD_TYPE) \
CVIVANTE_NAMESPACE("cl.gather_nd_batch_"#SRC0_TYPE"to"#DST_TYPE#COORD_TYPE)
#define TENSOR_GATHER_ND_BATCH_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, SOURCE) \
{ HASH_GATHER_ND_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, COORD_TYPE, 1), \
HASH_GATHER_ND_BATCH_SH_KERNEL_NAME(IN0_TYPE, OUT_TYPE, COORD_TYPE), \
SOURCE },
static const struct {
uint32_t key;
char* function_name;
@ -81,6 +90,12 @@ static const struct {
TENSOR_GATHER_ND_KERNELS(F16, I32, F16, _3D, KERNEL_SOURCE_2)
TENSOR_GATHER_ND_KERNELS(I32, I32, I32, _3D, KERNEL_SOURCE_2)
TENSOR_GATHER_ND_KERNELS(F32, I32, F32, _3D, KERNEL_SOURCE_2)
TENSOR_GATHER_ND_BATCH_KERNELS(U8, I32, U8, _1D, KERNEL_SOURCE_3)
TENSOR_GATHER_ND_BATCH_KERNELS(I8, I32, I8, _1D, KERNEL_SOURCE_3)
TENSOR_GATHER_ND_BATCH_KERNELS(F16, I32, F16, _1D, KERNEL_SOURCE_3)
TENSOR_GATHER_ND_BATCH_KERNELS(U8, I32, U8, _2D, KERNEL_SOURCE_3)
TENSOR_GATHER_ND_BATCH_KERNELS(I8, I32, I8, _2D, KERNEL_SOURCE_3)
TENSOR_GATHER_ND_BATCH_KERNELS(F16, I32, F16, _2D, KERNEL_SOURCE_3)
};
/*
@ -103,7 +118,8 @@ static vsi_status cal_gather_nd_tensor_reshape_size
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
uint32_t block_size,
uint32_t coordDim,
int32_t* newDim
int32_t* newDim,
int32_t batch_dims
)
{
vsi_status status = VSI_FAILURE;
@ -114,45 +130,63 @@ static vsi_status cal_gather_nd_tensor_reshape_size
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
newDim[0] = 0;
for(i = 0; i < dims_num; ++i)
for (i = 0; i < dims_num; ++i)
{
elementCnt *= input_size[i];
}
for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
{
sizes[i] = 1;
}
if(coordDim) // input reshape
if (coordDim) // input reshape
{
uint32_t offset = dims_num - coordDim + 1;
for(i = coordDim-1; i > 0; i--)
{
sizes[i] = input_size[i + offset - 1];
}
for(i = 0; i < offset; i++)
{
sizes[0] *= input_size[i];
}
uint32_t offset = dims_num - coordDim + 1 - batch_dims;
newDim[0] = coordDim;
if(coordDim == 1)
if (batch_dims)
{
newDim[0] = 2;
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
for (i = 0; i < offset; i++)
{
sizes[0] *= input_size[i];
}
for (i = 0; i < coordDim; i++)
{
sizes[i + 1] = input_size[i + offset];
}
newDim[0] = coordDim == 1 ? 2 : 3;
}
else if(coordDim == 4)
else
{
newDim[0] = 3;
for (i = coordDim-1; i > 0; i--)
{
sizes[i] = input_size[i + offset - 1];
}
for (i = 0; i < offset; i++)
{
sizes[0] *= input_size[i];
}
newDim[0] = coordDim;
if (coordDim == 1)
{
newDim[0] = 2;
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
}
else if (coordDim == 4)
{
newDim[0] = 3;
}
}
status = VSI_SUCCESS;
}
else // indices&output reshape
{
if((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
{
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
@ -222,7 +256,8 @@ static vsi_status _query_kernel
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
int32_t coord_dim
int32_t coord_dim,
int32_t batch_dims
)
{
vsi_status status = VSI_FAILURE;
@ -234,30 +269,49 @@ static vsi_status _query_kernel
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if(coord_dim == 1)
if (input0_dtype == F32)
{
input0_dtype = F16;
}
else if (input0_dtype == I32 || input0_dtype == I16)
{
input0_dtype = I8;
}
if (output_dtype == F32)
{
output_dtype = F16;
}
else if (output_dtype == I32 || output_dtype == I16)
{
output_dtype = I8;
}
if (coord_dim == 1)
{
coord_type = _1D;
}
else if(coord_dim == 2)
else if (coord_dim == 2)
{
coord_type = _2D;
}
else if(coord_dim == 3 || coord_dim == 4)
else if (coord_dim == 3 || coord_dim == 4)
{
coord_type = _3D;
}
key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type );
key = HASH_GATHER_ND_KEY( input0_dtype, I32, output_dtype, coord_type, batch_dims );
for( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
for ( i = 0; i < _cnt_of_array(gather_nd_map); i ++ )
{
if( gather_nd_map[i].key == key )
if ( gather_nd_map[i].key == key )
{
break;
}
}
if( i < _cnt_of_array(gather_nd_map) )
if ( i < _cnt_of_array(gather_nd_map) )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", gather_nd_map[i].function_name );
kernel->info.parameters = _gather_nd_kernel_param_def;
@ -289,29 +343,30 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_GATHER_ND_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
int32_t batch_dims = vsi_nn_kernel_param_get_int32( params, "batch_dims" );
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim);
status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim);
status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim);
if(status != VSI_SUCCESS)
status = cal_gather_nd_tensor_reshape_size(&inputs[0], shapes[0], block_size, coord_dim, &rs_in_dim, batch_dims);
status |= cal_gather_nd_tensor_reshape_size(&inputs[1], shapes[1], coord_dim, 0, &rs_idx_dim, batch_dims);
status |= cal_gather_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, 0, &rs_out_dim, batch_dims);
if (status != VSI_SUCCESS)
{
return NULL;
}
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
{
return NULL;
}
status = _query_kernel( kernel, inputs, outputs, coord_dim );
if( VSI_SUCCESS == status)
status = _query_kernel( kernel, inputs, outputs, coord_dim, batch_dims );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
if ( node )
{
uint32_t index = 0;
/* Pass parameters to node. */

View File

@ -0,0 +1,292 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
#define _GLOBALLPPOOL_KERNEL_SOURCE_NAME "globallppool"
// Add kernel hashtable here
#define GLOBALLPPOOL_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
#define GLOBALLPPOOL_KERNELS( IN_DTYPE, OUT_DTYPE ) \
{ GLOBALLPPOOL_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
CVIVANTE_NAMESPACE("cl.globallppool_"#IN_DTYPE"to"#OUT_DTYPE), \
_GLOBALLPPOOL_KERNEL_SOURCE_NAME }, \
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _globallppool_kernel_map[] =
{
// Register kernel here
GLOBALLPPOOL_KERNELS( F32, F32 )
GLOBALLPPOOL_KERNELS( F32, U32 )
GLOBALLPPOOL_KERNELS( F32, I32 )
GLOBALLPPOOL_KERNELS( U32, U32 )
GLOBALLPPOOL_KERNELS( U32, F32 )
GLOBALLPPOOL_KERNELS( I32, I32 )
GLOBALLPPOOL_KERNELS( I32, F32 )
GLOBALLPPOOL_KERNELS( BF16, BF16 )
};
/*
* Kernel params
*/
static vx_param_description_t _globallppool_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _GLOBALLPPOOL_PARAM_NUM _cnt_of_array( _globallppool_kernel_param_def )
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_globallppool_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
gpu_param_t gpu_param = {
1,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_status status = VSI_FAILURE;
vx_tensor output = (vx_tensor)param[1];
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
vsi_size_array_t *output_shape = NULL;
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
output_shape = output_attr->shape;
gpu_param.global_scale[0] = 1;
gpu_param.global_size[0] = (output_shape->data[2] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0];
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (output_attr)
{
vsi_nn_kernel_tensor_attr_release(&output_attr);
}
return status;
} /* _globallppool_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _globallppool_kernel_map;
size_t kernel_map_size = _cnt_of_array( _globallppool_kernel_map );
vx_param_description_t * param_def = _globallppool_kernel_param_def;
vx_kernel_initialize_f initializer = _globallppool_initializer;
uint32_t key;
uint32_t i;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
(( in_dtype ) | (out_dtype << 8 ))
switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
{
case _PACK_SELECT_KEY(F32, F32):
case _PACK_SELECT_KEY(F16, F16):
case _PACK_SELECT_KEY(F32, F16):
case _PACK_SELECT_KEY(F16, F32):
key = GLOBALLPPOOL_HASH_KEY( F32, F32);
break;
case _PACK_SELECT_KEY(F32, U8):
case _PACK_SELECT_KEY(F16, U8):
key = GLOBALLPPOOL_HASH_KEY( F32, U32);
break;
case _PACK_SELECT_KEY(F32, I8):
case _PACK_SELECT_KEY(F32, I16):
case _PACK_SELECT_KEY(F16, I8):
case _PACK_SELECT_KEY(F16, I16):
key = GLOBALLPPOOL_HASH_KEY( F32, I32);
break;
case _PACK_SELECT_KEY(U8, U8):
key = GLOBALLPPOOL_HASH_KEY( U32, U32);
break;
case _PACK_SELECT_KEY(U8, F16):
case _PACK_SELECT_KEY(U8, F32):
key = GLOBALLPPOOL_HASH_KEY( U32, F32);
break;
case _PACK_SELECT_KEY(I8, I8):
case _PACK_SELECT_KEY(I8, I16):
case _PACK_SELECT_KEY(I16, I8):
case _PACK_SELECT_KEY(I16, I16):
key = GLOBALLPPOOL_HASH_KEY( I32, I32);
break;
case _PACK_SELECT_KEY(I8, F16):
case _PACK_SELECT_KEY(I8, F32):
case _PACK_SELECT_KEY(I16, F16):
case _PACK_SELECT_KEY(I16, F32):
key = GLOBALLPPOOL_HASH_KEY( I32, F32);
break;
default:
key = GLOBALLPPOOL_HASH_KEY( in_dtype, out_dtype);
break;
}
#undef _PACK_SELECT_KEY
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = _cnt_of_array( _globallppool_kernel_param_def );
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
"eltwise_ops_helper",
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_GLOBALLPPOOL_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
int32_t p = vsi_nn_kernel_param_get_int32(params, "p");
int32_t width = (int32_t)inputs[0]->attr.size[0];
int32_t height = (int32_t)inputs[0]->attr.size[1];
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
inputs[0]->attr.dim_num )
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ))
{
return NULL;
}
outputScale = 1.0f / outputScale;
inputTail = -(inputTail * inputScale);
status = _query_kernel( kernel, inputs, outputs );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
uint32_t index = 2;
vsi_nn_kernel_node_pack_io( node_params, _GLOBALLPPOOL_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &p );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _GLOBALLPPOOL_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[2] );
vsi_nn_kernel_scalar_release( &node_params[3] );
vsi_nn_kernel_scalar_release( &node_params[4] );
vsi_nn_kernel_scalar_release( &node_params[5] );
vsi_nn_kernel_scalar_release( &node_params[6] );
vsi_nn_kernel_scalar_release( &node_params[7] );
vsi_nn_kernel_scalar_release( &node_params[8] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( globallppool, _setup )

View File

@ -0,0 +1,365 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _L1NORM_KERNEL_SOURCE_NAME "l1norm"
// Add kernel hashtable here
#define L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, _image_2d, AXIS) \
(( IN_DTYPE << 24 ) | ( OUT_DTYPE << 16) | (_image_2d << 8) | (AXIS))
#define L1NORM_KERNELS( IN_DTYPE, OUT_DTYPE, AXIS ) \
{ L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, 0 , AXIS), \
CVIVANTE_NAMESPACE("cl.l1norm_"#IN_DTYPE"to"#OUT_DTYPE"_axis"#AXIS), \
_L1NORM_KERNEL_SOURCE_NAME }
#define L1NORM_KERNELS_2D( IN_DTYPE, OUT_DTYPE, AXIS ) \
{ L1NORM_HASH_KEY( IN_DTYPE, OUT_DTYPE, 1, AXIS), \
CVIVANTE_NAMESPACE("cl.l1norm_"#IN_DTYPE"to"#OUT_DTYPE"_2D_axis"#AXIS), \
_L1NORM_KERNEL_SOURCE_NAME }
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _l1norm_kernel_map[] =
{
// Register kernel here
L1NORM_KERNELS( U32, U32, 0 ),
L1NORM_KERNELS( U32, I32, 0 ),
L1NORM_KERNELS( U32, F32, 0 ),
L1NORM_KERNELS( I32, I32, 0 ),
L1NORM_KERNELS( I32, U32, 0 ),
L1NORM_KERNELS( I32, F32, 0 ),
L1NORM_KERNELS( F32, F32, 0 ),
L1NORM_KERNELS( F32, U32, 0 ),
L1NORM_KERNELS( F32, I32, 0 ),
L1NORM_KERNELS( U32, U32, 1 ),
L1NORM_KERNELS( U32, I32, 1 ),
L1NORM_KERNELS( U32, F32, 1 ),
L1NORM_KERNELS( I32, I32, 1 ),
L1NORM_KERNELS( I32, U32, 1 ),
L1NORM_KERNELS( I32, F32, 1 ),
L1NORM_KERNELS( F32, F32, 1 ),
L1NORM_KERNELS( F32, U32, 1 ),
L1NORM_KERNELS( F32, I32, 1 ),
L1NORM_KERNELS( U32, U32, 2 ),
L1NORM_KERNELS( U32, I32, 2 ),
L1NORM_KERNELS( U32, F32, 2 ),
L1NORM_KERNELS( I32, I32, 2 ),
L1NORM_KERNELS( I32, U32, 2 ),
L1NORM_KERNELS( I32, F32, 2 ),
L1NORM_KERNELS( F32, F32, 2 ),
L1NORM_KERNELS( F32, U32, 2 ),
L1NORM_KERNELS( F32, I32, 2 ),
L1NORM_KERNELS_2D( U32, U32, 0 ),
L1NORM_KERNELS_2D( U32, I32, 0 ),
L1NORM_KERNELS_2D( U32, F32, 0 ),
L1NORM_KERNELS_2D( I32, I32, 0 ),
L1NORM_KERNELS_2D( I32, U32, 0 ),
L1NORM_KERNELS_2D( I32, F32, 0 ),
L1NORM_KERNELS_2D( F32, F32, 0 ),
L1NORM_KERNELS_2D( F32, U32, 0 ),
L1NORM_KERNELS_2D( F32, I32, 0 ),
L1NORM_KERNELS_2D( U32, U32, 1 ),
L1NORM_KERNELS_2D( U32, I32, 1 ),
L1NORM_KERNELS_2D( U32, F32, 1 ),
L1NORM_KERNELS_2D( I32, I32, 1 ),
L1NORM_KERNELS_2D( I32, U32, 1 ),
L1NORM_KERNELS_2D( I32, F32, 1 ),
L1NORM_KERNELS_2D( F32, F32, 1 ),
L1NORM_KERNELS_2D( F32, U32, 1 ),
L1NORM_KERNELS_2D( F32, I32, 1 ),
};
/*
* Kernel params
*/
static vx_param_description_t _l1norm_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
// Add kererl parameters here
};
#define _L1NORM_PARAM_NUM _cnt_of_array( _l1norm_kernel_param_def )
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_l1norm_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_status status = VSI_FAILURE;
vx_tensor output = (vx_tensor)param[1];
vx_int32 axis = 0;
vx_int32 dim = 0;
vx_int32 width = 0;
vx_int32 height = 0;
vx_int32 depth = 0;
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
vsi_size_array_t *output_shape = NULL;
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[5], &axis);
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
output_shape = output_attr->shape;
dim = output_shape->size < 3 ? 2 : 3;
width = (vx_int32)output_shape->data[0];
height = (vx_int32)output_shape->data[1];
depth = dim < 3 ? 1 : (vx_int32)output_shape->data[2];
gpu_param.dim = dim;
gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
if (axis == 0)
{
gpu_param.local_size[0] = 16;
gpu_param.local_size[1] = 1;
gpu_param.local_size[2] = 1;
gpu_param.global_size[0] = 16;
gpu_param.global_size[1] = height;
gpu_param.global_size[2] = depth;
}
else if (axis == 1)
{
gpu_param.local_size[0] = 1;
gpu_param.local_size[1] = 16;
gpu_param.local_size[2] = 1;
gpu_param.global_size[0] = width;
gpu_param.global_size[1] = 16;
gpu_param.global_size[2] = depth;
}
else
{
gpu_param.local_size[0] = 1;
gpu_param.local_size[1] = 1;
gpu_param.local_size[2] = 16;
gpu_param.global_size[0] = width;
gpu_param.global_size[1] = height;
gpu_param.global_size[2] = 16;
}
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (output_attr)
{
vsi_nn_kernel_tensor_attr_release(&output_attr);
}
return status;
} /* _l1norm_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
vsi_bool image_2d,
int32_t axis
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _l1norm_kernel_map;
size_t kernel_map_size = _cnt_of_array( _l1norm_kernel_map );
vx_param_description_t * param_def = _l1norm_kernel_param_def;
vx_kernel_initialize_f initializer = _l1norm_initializer;
uint32_t key;
uint32_t i;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (F16 == in_dtype)
{
in_dtype = F32;
}
else if (U8 == in_dtype)
{
in_dtype = U32;
}
else if (I16 == in_dtype || I8 == in_dtype)
{
in_dtype = I32;
}
if (F16 == out_dtype)
{
out_dtype = F32;
}
else if (U8 == out_dtype)
{
out_dtype = U32;
}
else if (I16 == out_dtype || I8 == out_dtype)
{
out_dtype = I32;
}
key = L1NORM_HASH_KEY( in_dtype, out_dtype, image_2d, axis);
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = _cnt_of_array( _l1norm_kernel_param_def );
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_L1NORM_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
vsi_bool image_2d = FALSE;
int32_t axis = vsi_nn_kernel_param_get_int32(params, "axis");
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
float inputZp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
int32_t axis_size = (int32_t)outputs[0]->attr.size[axis];
outputScale = 1.0f / outputScale;
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
{
return NULL;
}
image_2d = (outputs[0]->attr.dim_num == 2);
status = _query_kernel( kernel, inputs, outputs, image_2d, axis );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
uint32_t index = 2;
vsi_nn_kernel_node_pack_io( node_params, _L1NORM_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputZp );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &axis_size );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _L1NORM_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[2] );
vsi_nn_kernel_scalar_release( &node_params[3] );
vsi_nn_kernel_scalar_release( &node_params[4] );
vsi_nn_kernel_scalar_release( &node_params[5] );
vsi_nn_kernel_scalar_release( &node_params[6] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( l1norm, _setup )

View File

@ -35,6 +35,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
__BEGIN_DECLS
@ -212,27 +213,52 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_LOGICAL_NOT_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
vsi_size_t new_rank = 0;
vsi_bool ret = FALSE;
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
ret = vsi_nn_kernel_optimize_element_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num,
shape, &new_rank );
if ( ret )
{
return NULL;
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shape, new_rank );
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
outputs[0], shape, new_rank );
}
else
{
goto final;
}
image_2d = (outputs[0]->attr.dim_num == 2 || outputs[0]->attr.size[2] == 1);
status = _query_kernel( kernel, inputs, outputs, image_2d);
if( VSI_SUCCESS == status)
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[1]->attr.size,
reshape_tensors[1]->attr.dim_num ) )
{
goto final;
}
image_2d = (reshape_tensors[1]->attr.dim_num == 2 || reshape_tensors[1]->attr.size[2] == 1);
status = _query_kernel( kernel, &reshape_tensors[0], &reshape_tensors[1], image_2d);
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_NOT_PARAM_NUM,
inputs, input_num, outputs, output_num );
&reshape_tensors[0], input_num, &reshape_tensors[1], output_num );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_NOT_PARAM_NUM );
}
}
final:
vsi_safe_release_tensor( reshape_tensors[0] );
vsi_safe_release_tensor( reshape_tensors[1] );
return node;
} /* _setup() */

View File

@ -35,7 +35,7 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vx_lib_nnext.h"
#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
@ -228,30 +228,75 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_LOGICAL_OPS_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{ 1 }};
vsi_size_t new_rank = 0;
vsi_bool ret = FALSE;
uint32_t ops_type = vsi_nn_kernel_param_get_int32( params, "ops_type" );
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
ret = vsi_nn_kernel_optimize_eltwise_shape(
inputs[0]->attr.size, inputs[0]->attr.dim_num,
inputs[1]->attr.size, inputs[1]->attr.dim_num,
outputs[0]->attr.size, outputs[0]->attr.dim_num,
shapes[0], shapes[1], shapes[2], &new_rank );
if ( ret )
{
return NULL;
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shapes[0], new_rank );
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
inputs[1], shapes[1], new_rank );
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
outputs[0], shapes[2], new_rank );
#define _swap_tensor(a, b, tmp) \
do { \
tmp = a; \
a = b; \
b = tmp; \
} while(0)
if (shapes[1][3] > shapes[0][3] && new_rank == 4)
{
vsi_nn_tensor_t* reshape_tmp;
_swap_tensor(reshape_tensors[0], reshape_tensors[1], reshape_tmp);
}
#undef _swap_tensor
}
else
{
goto final;
}
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[2]->attr.size,
reshape_tensors[2]->attr.dim_num ) )
{
goto final;
}
image_2d = (outputs[0]->attr.dim_num == 2);
status = _query_kernel( kernel, inputs, outputs, image_2d, (vsi_nn_logical_ops_type_t)ops_type);
status = _query_kernel( kernel, reshape_tensors, &reshape_tensors[2],
image_2d, (vsi_nn_logical_ops_type_t)ops_type);
if( VSI_SUCCESS == status)
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
if ( node )
{
/* Pass parameters to node. */
vsi_nn_kernel_node_pack_io( node_params, _LOGICAL_OPS_PARAM_NUM,
inputs, input_num, outputs, output_num );
reshape_tensors, input_num, &reshape_tensors[2], output_num );
status = vsi_nn_kernel_node_pass_param( node, node_params, _LOGICAL_OPS_PARAM_NUM );
}
}
final:
vsi_safe_release_tensor( reshape_tensors[0] );
vsi_safe_release_tensor( reshape_tensors[1] );
vsi_safe_release_tensor( reshape_tensors[2] );
return node;
} /* _setup() */

View File

@ -64,12 +64,12 @@ __BEGIN_DECLS
#define TENSOR_MATRIXMUL_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
{ HASH_MATRIXMUL_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, 0), \
HASH_MATRIXMUL_SH_KERNEL_NAME(F32, F32, F32, IMAGE_DIM), \
HASH_MATRIXMUL_SH_KERNEL_NAME(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM), \
SOURCE },
#define TENSOR_MATRIXMUL_TRANSA_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
{ HASH_MATRIXMUL_KEY(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, 1), \
HASH_MATRIXMUL_TRANSA_SH_KERNEL_NAME(F32, F32, F32, IMAGE_DIM), \
HASH_MATRIXMUL_TRANSA_SH_KERNEL_NAME(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM), \
SOURCE },
#define TENSOR_MATRIXMUL_TRANSB_KERNELS(IN0_TYPE, IN1_TYPE, OUT_TYPE, IMAGE_DIM, SOURCE) \
@ -83,18 +83,32 @@ static const struct {
const char* source_name;
} matrixmul_map[] =
{
TENSOR_MATRIXMUL_KERNELS(F16, F16, F16, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(F16, F16, F16, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSA_KERNELS(F16, F16, F16, _2D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSA_KERNELS(F16, F16, F16, _3D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSA_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSA_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, F32, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, F32, F32, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(F32, I8, F32, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSA_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSA_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSB_KERNELS(I8, I8, I8, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(I8, I8, I8, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, U8, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, U8, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSA_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_2)
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, F32, _2D, KERNEL_SOURCE_1)
TENSOR_MATRIXMUL_TRANSB_KERNELS(U8, U8, F32, _3D, KERNEL_SOURCE_1)
};
/*
@ -198,10 +212,44 @@ static vsi_status _query_kernel
dim_type = _3D;
}
if (input0_dtype == I16 || input0_dtype == I32)
{
input0_dtype = I8;
}
else if (input0_dtype == F16)
{
input0_dtype = F32;
}
else if (input0_dtype == U32)
{
input0_dtype = U8;
}
if (input1_dtype == I16 || input1_dtype == I32)
{
input1_dtype = I8;
}
else if (input1_dtype == F16)
{
input1_dtype = F32;
}
else if (input1_dtype == U32)
{
input1_dtype = U8;
}
if (output_dtype == I16 || output_dtype == I32)
{
output_dtype = I8;
}
else if (output_dtype == F16)
{
output_dtype = F32;
}
else if (output_dtype == U32)
{
output_dtype = U8;
}
key = HASH_MATRIXMUL_KEY( input0_dtype, input1_dtype, output_dtype, dim_type, transa );
@ -260,6 +308,8 @@ static vsi_nn_kernel_node_t _setup
float scale_out = vsi_nn_get_tensor_scale(outputs[0]);
float zp_out = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
scale_out = 1 / scale_out;
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
{

View File

@ -0,0 +1,330 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _MAXUNPOOL_KERNEL_SOURCE_NAME "maxunpool"
// Add kernel hashtable here
#define MAXUNPOOL_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE ) \
(( IN_DTYPE0 << 16 ) | ( IN_DTYPE1 << 8 ) | ( OUT_DTYPE ))
#define MAXUNPOOL_KERNELS( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE ) \
{ MAXUNPOOL_HASH_KEY( IN_DTYPE0, I32, OUT_DTYPE ), \
CVIVANTE_NAMESPACE("cl.maxunpool_"#IN_DTYPE0"to"#OUT_DTYPE), \
_MAXUNPOOL_KERNEL_SOURCE_NAME },
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _maxunpool_kernel_map[] =
{
// Register kernel here
MAXUNPOOL_KERNELS( F32, I32, F32)
MAXUNPOOL_KERNELS( F32, I32, U32)
MAXUNPOOL_KERNELS( F32, I32, I32)
MAXUNPOOL_KERNELS( U32, I32, U32)
MAXUNPOOL_KERNELS( U32, I32, F32)
MAXUNPOOL_KERNELS( I32, I32, I32)
MAXUNPOOL_KERNELS( I32, I32, F32)
MAXUNPOOL_KERNELS( BF16, I32, BF16)
};
/*
* Kernel params
*/
static vx_param_description_t _maxunpool_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _MAXUNPOOL_PARAM_NUM _cnt_of_array( _maxunpool_kernel_param_def )
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_maxunpool_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_status status = VSI_FAILURE;
vx_tensor output = (vx_tensor)param[2];
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
vsi_size_array_t *output_shape = NULL;
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
output_shape = output_attr->shape;
gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
gpu_param.global_size[0] = (output_shape->data[0] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0];
gpu_param.global_size[1] = (output_shape->data[1] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1];
gpu_param.global_size[2] = (output_shape->data[2] + gpu_param.global_scale[2] - 1)
/ gpu_param.global_scale[2];
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (output_attr)
{
vsi_nn_kernel_tensor_attr_release(&output_attr);
}
return status;
} /* _maxunpool_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _maxunpool_kernel_map;
vx_kernel_initialize_f initializer = _maxunpool_initializer;
vx_param_description_t * param_def = _maxunpool_kernel_param_def;
size_t kernel_map_size = _cnt_of_array( _maxunpool_kernel_map );
size_t param_size = _cnt_of_array( _maxunpool_kernel_param_def );
uint32_t key;
uint32_t i;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
(( in_dtype ) | (out_dtype << 8 ))
switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
{
case _PACK_SELECT_KEY(F32, F32):
case _PACK_SELECT_KEY(F16, F16):
case _PACK_SELECT_KEY(F32, F16):
case _PACK_SELECT_KEY(F16, F32):
key = MAXUNPOOL_HASH_KEY( F32, I32, F32);
break;
case _PACK_SELECT_KEY(F32, U8):
case _PACK_SELECT_KEY(F16, U8):
key = MAXUNPOOL_HASH_KEY( F32, I32, U32);
break;
case _PACK_SELECT_KEY(F32, I8):
case _PACK_SELECT_KEY(F32, I16):
case _PACK_SELECT_KEY(F16, I8):
case _PACK_SELECT_KEY(F16, I16):
key = MAXUNPOOL_HASH_KEY( F32, I32, I32);
break;
case _PACK_SELECT_KEY(U8, U8):
key = MAXUNPOOL_HASH_KEY( U32, I32, U32);
break;
case _PACK_SELECT_KEY(U8, F16):
case _PACK_SELECT_KEY(U8, F32):
key = MAXUNPOOL_HASH_KEY( U32, I32, F32);
break;
case _PACK_SELECT_KEY(I8, I8):
case _PACK_SELECT_KEY(I8, I16):
case _PACK_SELECT_KEY(I16, I8):
case _PACK_SELECT_KEY(I16, I16):
key = MAXUNPOOL_HASH_KEY( I32, I32, I32);
break;
case _PACK_SELECT_KEY(I8, F16):
case _PACK_SELECT_KEY(I8, F32):
case _PACK_SELECT_KEY(I16, F16):
case _PACK_SELECT_KEY(I16, F32):
key = MAXUNPOOL_HASH_KEY( I32, I32, F32);
break;
default:
key = MAXUNPOOL_HASH_KEY( in_dtype, I32, out_dtype);
break;
}
#undef _PACK_SELECT_KEY
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = (uint32_t)param_size;
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
"eltwise_ops_helper",
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_MAXUNPOOL_PARAM_NUM] = { NULL };
vsi_nn_kernel_node_t node = NULL;
int32_t pad_left = vsi_nn_kernel_param_get_int32(params, "pad_left");
int32_t pad_right = vsi_nn_kernel_param_get_int32(params, "pad_right");
int32_t pad_top = vsi_nn_kernel_param_get_int32(params, "pad_top");
int32_t pad_bottom = vsi_nn_kernel_param_get_int32(params, "pad_bottom");
int32_t width_in = (int32_t)inputs[0]->attr.size[0];
int32_t height_in = (int32_t)inputs[0]->attr.size[1];
int32_t width = (int32_t)outputs[0]->attr.size[0];
int32_t height = (int32_t)outputs[0]->attr.size[1];
int32_t batch = (int32_t)outputs[0]->attr.size[2];
int32_t width_nopad = width - pad_left - pad_right;
int32_t height_nopad = height - pad_top - pad_bottom;
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
inputs[0]->attr.dim_num )
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ))
{
return NULL;
}
outputScale = 1.0f / outputScale;
inputTail = -(inputTail * inputScale);
status = _query_kernel( kernel, inputs, outputs );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
uint32_t index = 3;
vsi_nn_kernel_node_pack_io( node_params, _MAXUNPOOL_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width_nopad );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height_nopad );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &width_in );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &height_in );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &batch );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_left );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &pad_top );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _MAXUNPOOL_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[3] );
vsi_nn_kernel_scalar_release( &node_params[4] );
vsi_nn_kernel_scalar_release( &node_params[5] );
vsi_nn_kernel_scalar_release( &node_params[6] );
vsi_nn_kernel_scalar_release( &node_params[7] );
vsi_nn_kernel_scalar_release( &node_params[8] );
vsi_nn_kernel_scalar_release( &node_params[9] );
vsi_nn_kernel_scalar_release( &node_params[10] );
vsi_nn_kernel_scalar_release( &node_params[11] );
vsi_nn_kernel_scalar_release( &node_params[12] );
vsi_nn_kernel_scalar_release( &node_params[13] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( maxunpool, _setup )

View File

@ -81,9 +81,11 @@ static const struct {
{
TENSOR_POW_KERNELS_FLOAT(F32, F32, F32, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS_FLOAT(F16, F16, F16, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS(U32, F32, U32, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS_2D_FLOAT(F32, F32, F32, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS_2D_FLOAT(F16, F16, F16, KERNEL_SOURCE_1)
TENSOR_POW_KERNELS_2D(U32, F32, U32, KERNEL_SOURCE_1)
};
/*
@ -94,6 +96,10 @@ static vx_param_description_t kernel_param_def[] =
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _CL_PARAM_NUM _cnt_of_array(kernel_param_def)
@ -179,7 +185,25 @@ static vsi_status _query_kernel
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
key = HASH_POW_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
#define _PACK_SELECT_KEY( input0_dtype, input1_dtype, output_dtype) \
((input0_dtype) | (input1_dtype << 8) | (output_dtype << 16))
switch(_PACK_SELECT_KEY(input0_dtype, input1_dtype, output_dtype))
{
case _PACK_SELECT_KEY(F16, F16, F16):
case _PACK_SELECT_KEY(F32, F32, F32):
key = HASH_POW_KEY( F32, F32, F32, image_2d );
break;
case _PACK_SELECT_KEY(U8, F16, U8):
case _PACK_SELECT_KEY(U8, F32, U8):
case _PACK_SELECT_KEY(U32, F16, U32):
case _PACK_SELECT_KEY(U32, F32, U32):
key = HASH_POW_KEY( U32, F32, U32, image_2d );
break;
default:
key = HASH_POW_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
break;
}
for( i = 0; i < _cnt_of_array(pow_map); i ++ )
{
@ -219,6 +243,13 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_CL_PARAM_NUM] = {NULL};
vsi_bool image_2d = FALSE;
vsi_nn_kernel_node_t node = NULL;
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
outputScale = 1.0f / outputScale;
inputTail = -(inputTail * inputScale);
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
@ -234,11 +265,20 @@ static vsi_nn_kernel_node_t _setup
if( node )
{
uint32_t index = 3;
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
inputs, 2, outputs, 1 );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inputTail );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &outputTail );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[3] );
vsi_nn_kernel_scalar_release( &node_params[4] );
vsi_nn_kernel_scalar_release( &node_params[5] );
vsi_nn_kernel_scalar_release( &node_params[6] );
VSI_ASSERT( status == VSI_SUCCESS );
}
}

View File

@ -0,0 +1,307 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_error.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _REVERSESEQUENCE_KERNEL_SOURCE_NAME "reversesequence"
// Add kernel hashtable here
#define REVERSESEQUENCE_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ) \
(( IN_DTYPE0 << 24 ) | ( IN_DTYPE1 << 16 ) | ( OUT_DTYPE << 8) | (batch_axis) )
#define REVERSESEQUENCE_KERNELS( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ) \
{ REVERSESEQUENCE_HASH_KEY( IN_DTYPE0, IN_DTYPE1, OUT_DTYPE, batch_axis ), \
CVIVANTE_NAMESPACE("cl.reversesequence_"#IN_DTYPE0"to"#OUT_DTYPE#batch_axis), \
_REVERSESEQUENCE_KERNEL_SOURCE_NAME },
typedef enum
{
_axis1 = 0,
_axis2
} vsi_nn_kernel_batch_axis_type_e;
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _reversesequence_kernel_map[] =
{
// Register kernel here
REVERSESEQUENCE_KERNELS( F32, I32, F32, _axis1)
REVERSESEQUENCE_KERNELS( F32, I32, U32, _axis1)
REVERSESEQUENCE_KERNELS( F32, I32, I32, _axis1)
REVERSESEQUENCE_KERNELS( U32, I32, U32, _axis1)
REVERSESEQUENCE_KERNELS( U32, I32, F32, _axis1)
REVERSESEQUENCE_KERNELS( I32, I32, I32, _axis1)
REVERSESEQUENCE_KERNELS( I32, I32, F32, _axis1)
REVERSESEQUENCE_KERNELS( BF16, I32, BF16, _axis1)
REVERSESEQUENCE_KERNELS( F32, I32, F32, _axis2)
REVERSESEQUENCE_KERNELS( F32, I32, U32, _axis2)
REVERSESEQUENCE_KERNELS( F32, I32, I32, _axis2)
REVERSESEQUENCE_KERNELS( U32, I32, U32, _axis2)
REVERSESEQUENCE_KERNELS( U32, I32, F32, _axis2)
REVERSESEQUENCE_KERNELS( I32, I32, I32, _axis2)
REVERSESEQUENCE_KERNELS( I32, I32, F32, _axis2)
REVERSESEQUENCE_KERNELS( BF16, I32, BF16, _axis2)
};
/*
* Kernel params
*/
static vx_param_description_t _reversesequence_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _REVERSESEQUENCE_PARAM_NUM _cnt_of_array( _reversesequence_kernel_param_def )
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_reversesequence_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_status status = VSI_FAILURE;
vx_tensor input = (vx_tensor)param[0];
vsi_nn_kernel_tensor_attr_t *input_attr = NULL;
vsi_size_array_t *input_shape = NULL;
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input );
CHECK_PTR_FAIL_GOTO( input_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
input_shape = input_attr->shape;
gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
gpu_param.global_size[0] = (input_shape->data[0] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0];
gpu_param.global_size[1] = (input_shape->data[1] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1];
gpu_param.global_size[2] = (input_shape->data[2] + gpu_param.global_scale[2] - 1)
/ gpu_param.global_scale[2];
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
if (input_attr)
{
vsi_nn_kernel_tensor_attr_release(&input_attr);
}
return status;
} /* _reversesequence_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
int32_t batch_axis
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype;
vsi_nn_kernel_dtype_e out_dtype;
const _kernel_map_type * kernel_map = _reversesequence_kernel_map;
size_t kernel_map_size = _cnt_of_array( _reversesequence_kernel_map );
vx_param_description_t * param_def = _reversesequence_kernel_param_def;
vx_kernel_initialize_f initializer = _reversesequence_initializer;
vsi_nn_kernel_batch_axis_type_e axis_type = _axis1;
uint32_t key;
uint32_t i;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (batch_axis == 2)
{
axis_type = _axis2;
}
#define _PACK_SELECT_KEY( in_dtype, out_dtype ) \
(( in_dtype ) | (out_dtype << 8 ))
switch(_PACK_SELECT_KEY( in_dtype, out_dtype ))
{
case _PACK_SELECT_KEY(F16, F16):
case _PACK_SELECT_KEY(F32, F32):
key = REVERSESEQUENCE_HASH_KEY( F32, I32, F32, axis_type);
break;
case _PACK_SELECT_KEY(F16, U8):
case _PACK_SELECT_KEY(F32, U8):
key = REVERSESEQUENCE_HASH_KEY( F32, I32, U32, axis_type);
break;
case _PACK_SELECT_KEY(F16, I8):
case _PACK_SELECT_KEY(F32, I8):
case _PACK_SELECT_KEY(F16, I16):
case _PACK_SELECT_KEY(F32, I16):
key = REVERSESEQUENCE_HASH_KEY( F32, I32, I32, axis_type);
break;
case _PACK_SELECT_KEY(U8, U8):
key = REVERSESEQUENCE_HASH_KEY( U32, I32, U32, axis_type);
break;
case _PACK_SELECT_KEY(U8, F16):
case _PACK_SELECT_KEY(U8, F32):
key = REVERSESEQUENCE_HASH_KEY( U32, I32, F32, axis_type);
break;
case _PACK_SELECT_KEY(I8, I8):
case _PACK_SELECT_KEY(I16, I16):
key = REVERSESEQUENCE_HASH_KEY( I32, I32, I32, axis_type);
break;
case _PACK_SELECT_KEY(I8, F16):
case _PACK_SELECT_KEY(I8, F32):
case _PACK_SELECT_KEY(I16, F16):
case _PACK_SELECT_KEY(I16, F32):
key = REVERSESEQUENCE_HASH_KEY( I32, I32, F32, axis_type);
break;
case _PACK_SELECT_KEY(BF16, BF16):
key = REVERSESEQUENCE_HASH_KEY( BF16, I32, BF16, axis_type);
break;
default:
key = REVERSESEQUENCE_HASH_KEY( in_dtype, I32, out_dtype, axis_type);
break;
}
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = _cnt_of_array( _reversesequence_kernel_param_def );
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
"eltwise_ops_helper",
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_REVERSESEQUENCE_PARAM_NUM] = { NULL };
vsi_nn_kernel_node_t node = NULL;
int32_t batch_axis = vsi_nn_kernel_param_get_int32(params, "batch_axis");
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float inoutScale = inputScale / outputScale;
float inoutTail = outputTail - inputTail * inoutScale;
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
inputs[0]->attr.dim_num )
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ))
{
return NULL;
}
status = _query_kernel( kernel, inputs, outputs, batch_axis );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
uint32_t index = 3;
vsi_nn_kernel_node_pack_io( node_params, _REVERSESEQUENCE_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inoutScale );
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &inoutTail );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _REVERSESEQUENCE_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[3] );
vsi_nn_kernel_scalar_release( &node_params[4] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( reversesequence, _setup )

View File

@ -88,6 +88,7 @@ static vx_param_description_t _roi_align_kernel_param_def[] =
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _ROI_ALIGN_PARAM_NUM _cnt_of_array( _roi_align_kernel_param_def )
@ -105,8 +106,9 @@ static vx_param_description_t _roi_align_kernel_param_def[] =
#define SCALAR_SAMPLING_Y_RATIO (15)
#define SCALAR_DEPTH (16)
#define SCALAR_FORMAT (17)
#define PLATFORM_TYPE (18)
#define ROI_ALIGN_PARAM_NUM 18
#define ROI_ALIGN_PARAM_NUM 19
#define ROI_ALIGN_QUANT_PARAM_NUM _cnt_of_array( _roi_align_kernel_param_def )
/*
@ -250,6 +252,7 @@ static vsi_nn_kernel_node_t _setup
float height_ratio = vsi_nn_kernel_param_get_float32( params, "height_ratio" );
int32_t width_sample_num = vsi_nn_kernel_param_get_int32( params, "width_sample_num" );
int32_t height_sample_num = vsi_nn_kernel_param_get_int32( params, "height_sample_num" );
int32_t platform_type = vsi_nn_kernel_param_get_int32( params, "platform_type" );
float input_zp = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float input_scale = vsi_nn_get_tensor_scale(inputs[0]);
float input_tail = -(input_zp * input_scale);
@ -318,6 +321,7 @@ static vsi_nn_kernel_node_t _setup
node_params[SCALAR_SAMPLING_Y_RATIO] = vsi_nn_kernel_scalar_create( graph, F32, &sampling_y_ratio );
node_params[SCALAR_DEPTH] = vsi_nn_kernel_scalar_create( graph, I32, &depth );
node_params[SCALAR_FORMAT] = vsi_nn_kernel_scalar_create( graph, I32, &dtype );
node_params[PLATFORM_TYPE] = vsi_nn_kernel_scalar_create( graph, I32, &platform_type );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
@ -336,6 +340,7 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_scalar_release( &node_params[SCALAR_DEPTH] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_DEPTH] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_FORMAT] );
vsi_nn_kernel_scalar_release( &node_params[PLATFORM_TYPE] );
}
}

View File

@ -110,7 +110,7 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
uint32_t i = 0;
vsi_size_t elementCnt = 1;
if(coordDim != 0 && (width == NULL || area == NULL))
if (coordDim != 0 && (width == NULL || area == NULL))
{
return status;
}
@ -118,17 +118,17 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
newDim[0] = 0;
for(i = 0; i < dims_num; ++i)
for (i = 0; i < dims_num; ++i)
{
elementCnt *= input_size[i];
}
for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
{
sizes[i] = 1;
}
if((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
if ((elementCnt / block_size) < VSI_NN_MAX_IMAGE_WIDTH)
{
sizes[0] = block_size;
sizes[1] = elementCnt / block_size;
@ -140,17 +140,17 @@ static vsi_status cal_scatter_nd_tensor_reshape_size
return status;
}
if(coordDim == 1) // index shape
if (coordDim == 1) // index shape
{
*width = 0;
*area = 0;
}
else if(coordDim == 2)
else if (coordDim == 2)
{
*width = input_size[dims_num - 2];
*area = 0;
}
else if(coordDim == 3)
else if (coordDim == 3)
{
*width = input_size[dims_num - 3];
*area = input_size[dims_num - 3] * input_size[dims_num - 2];
@ -226,30 +226,33 @@ static vsi_status _query_kernel
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if(coord_dim == 1)
if (coord_dim == 1)
{
coord_type = _1D;
}
else if(coord_dim == 2)
else if (coord_dim == 2)
{
coord_type = _2D;
}
else if(coord_dim == 3)
else if (coord_dim == 3)
{
coord_type = _3D;
}
input1_dtype = input1_dtype == F16 ? F32 : input1_dtype;
output_dtype = output_dtype == F16 ? F32 : output_dtype;
key = HASH_SCATTER_ND_KEY( I32, input1_dtype, output_dtype, coord_type );
for( i = 0; i < _cnt_of_array(scatter_nd_map); i ++ )
for ( i = 0; i < _cnt_of_array(scatter_nd_map); i ++ )
{
if( scatter_nd_map[i].key == key )
if ( scatter_nd_map[i].key == key )
{
break;
}
}
if( i < _cnt_of_array(scatter_nd_map) )
if ( i < _cnt_of_array(scatter_nd_map) )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", scatter_nd_map[i].function_name );
kernel->info.parameters = _scatter_nd_kernel_param_def;
@ -287,26 +290,31 @@ static vsi_nn_kernel_node_t _setup
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
vsi_size_t width = 0, area = 0;
status = cal_scatter_nd_tensor_reshape_size(&inputs[0], shapes[0], coord_dim, 0, NULL, NULL, &rs_in_dim);
status |= cal_scatter_nd_tensor_reshape_size(&inputs[1], shapes[1], block_size, 0, NULL, NULL, &rs_idx_dim);
status |= cal_scatter_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, coord_dim,
&width, &area, &rs_out_dim);
if(status != VSI_SUCCESS)
if (coord_dim > 3)
{
return NULL;
}
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
status = cal_scatter_nd_tensor_reshape_size(&inputs[0], shapes[0], coord_dim, 0, NULL, NULL, &rs_in_dim);
status |= cal_scatter_nd_tensor_reshape_size(&inputs[1], shapes[1], block_size, 0, NULL, NULL, &rs_idx_dim);
status |= cal_scatter_nd_tensor_reshape_size(&outputs[0], shapes[2], block_size, coord_dim,
&width, &area, &rs_out_dim);
if (status != VSI_SUCCESS)
{
return NULL;
}
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
outputs[0]->attr.dim_num ) )
{
return NULL;
}
status = _query_kernel( kernel, inputs, outputs, coord_dim );
if( VSI_SUCCESS == status)
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
if ( node )
{
uint32_t index = 0;
/* Pass parameters to node. */

View File

@ -111,12 +111,12 @@ static vsi_status cal_scatter_nd_update_tensor_reshape_size
#define VSI_NN_MAX_IMAGE_WIDTH GPU_TENSOR_MAX_WIDTH
newDim[0] = 0;
for(i = 0; i < dims_num; ++i)
for (i = 0; i < dims_num; ++i)
{
elementCnt *= input_size[i];
}
for(i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
for (i = 0; i < VSI_NN_MAX_DIM_NUM; ++i)
{
sizes[i] = 1;
}
@ -235,7 +235,7 @@ static vsi_status _query_kernel
key = HASH_SCATTER_ND_UPDATE_KEY( input0_dtype, input2_dtype, output_dtype, 0 );
for( i = 0; i < _cnt_of_array(scatter_nd_update_map); i ++ )
for ( i = 0; i < _cnt_of_array(scatter_nd_update_map); i ++ )
{
if ( scatter_nd_update_map[i].key == key )
{
@ -281,6 +281,13 @@ static vsi_nn_kernel_node_t _setup
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
vsi_size_t width = 0, area = 0, vol = 0;
int32_t offsetX = 0, offsetY = 0, offsetZ = 0, offsetW = 0, offset_idx = 0;
vsi_size_t *input_size = inputs[2]->attr.size;
uint32_t dims_num = inputs[2]->attr.dim_num;
if (coord_dim > 4 && input_size[dims_num - 1] > 1)
{
return NULL;
}
status = cal_scatter_nd_update_tensor_reshape_size(&inputs[1], shapes[0],
coord_dim, 0, NULL, NULL, NULL, &rs_in_dim);

View File

@ -113,6 +113,8 @@ static const _kernel_map_type _swish_kernel_map[] =
SWISH_PACK_KERNEL_MAP_2D(U8, U8),
SWISH_PACK_KERNEL_MAP(I32, I32),
SWISH_PACK_KERNEL_MAP_2D(I32, I32),
SWISH_PACK_KERNEL_MAP(F32, U8),
SWISH_PACK_KERNEL_MAP_2D(F32, U8),
HSWISH_PACK_KERNEL_FLOAT_MAP(F32, F32),
HSWISH_PACK_KERNEL_FLOAT_MAP_2D(F32, F32),
HSWISH_PACK_KERNEL_FLOAT_MAP(F16, F16),
@ -222,6 +224,11 @@ static vsi_status _query_kernel
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (in_dtype == F16)
in_dtype = F32;
if (out_dtype == F16)
out_dtype = F32;
key = SWISH_HASH_KEY(swish_type, in_dtype, out_dtype, image_2d);
for( i = 0; i < kernel_map_size; i ++ )

View File

@ -279,7 +279,7 @@ static vsi_nn_kernel_node_t _setup
vsi_size_t new_rank = 0;
vsi_bool ret = FALSE;
uint32_t dim = inputs[0]->attr.dim_num;
vsi_size_t multiples[VSI_NN_MAX_DIM_NUM] = { 0 };
vsi_size_t multiples[VSI_NN_MAX_DIM_NUM] = { 1, 1, 1, 1 };
for ( i = 0; i < dim; i++)
{

View File

@ -55,6 +55,13 @@ __BEGIN_DECLS
CVIVANTE_NAMESPACE("cl.topk_odd_even_sort_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
"topk_odd_even_sort" }
#define TOPK_ODD_EVEN_SORT_HASH_KEY2( IN_DTYPE, OUT_DTYPE ) \
( ( IN_DTYPE ) | ( OUT_DTYPE << 8 ) )
#define PACK_ODD_EVEN_SORT_KERNEL_MAP2( IN_DTYPE, OUT_DTYPE ) \
{ TOPK_ODD_EVEN_SORT_HASH_KEY2( IN_DTYPE, OUT_DTYPE ), \
CVIVANTE_NAMESPACE("cl.topk_odd_even_sort_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
"topk_odd_even_sort2" }
typedef struct
{
uint32_t key;
@ -88,6 +95,22 @@ static const _kernel_map_type _topk_kernel_map[] =
PACK_KERNEL_MAP( I32, I32, 4 ),
PACK_KERNEL_MAP( I32, I32, 5 ),
PACK_KERNEL_MAP( I32, I32, 6 ),
PACK_KERNEL_MAP( F32, U32, 0 ),
PACK_KERNEL_MAP( F32, U32, 1 ),
PACK_KERNEL_MAP( F32, U32, 2 ),
PACK_KERNEL_MAP( F32, U32, 3 ),
PACK_KERNEL_MAP( F32, U32, 4 ),
PACK_KERNEL_MAP( F32, U32, 5 ),
PACK_KERNEL_MAP( F32, U32, 6 ),
PACK_KERNEL_MAP( F32, I32, 0 ),
PACK_KERNEL_MAP( F32, I32, 1 ),
PACK_KERNEL_MAP( F32, I32, 2 ),
PACK_KERNEL_MAP( F32, I32, 3 ),
PACK_KERNEL_MAP( F32, I32, 4 ),
PACK_KERNEL_MAP( F32, I32, 5 ),
PACK_KERNEL_MAP( F32, I32, 6 ),
};
static const _kernel_map_type _topk_odd_even_sort_kernel_map[] =
@ -96,6 +119,8 @@ static const _kernel_map_type _topk_odd_even_sort_kernel_map[] =
PACK_ODD_EVEN_SORT_KERNEL_MAP( F32, F32 ),
PACK_ODD_EVEN_SORT_KERNEL_MAP( U32, U32 ),
PACK_ODD_EVEN_SORT_KERNEL_MAP( I32, I32 ),
PACK_ODD_EVEN_SORT_KERNEL_MAP2( F32, U32 ),
PACK_ODD_EVEN_SORT_KERNEL_MAP2( F32, I32 ),
};
/*
@ -108,11 +133,15 @@ static vx_param_description_t _topk_kernel_param_def[] =
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
// Add kererl parameters here
};
#define _TOPK_PARAM_NUM _cnt_of_array( _topk_kernel_param_def )
#define SCALAR_INPUT_NUM_STAGES (3)
#define SCALAR_INPUT_WIDTH (4)
#define SCALAR_INPUT_NUM_STAGES (7)
#define SCALAR_INPUT_WIDTH (8)
static vx_param_description_t _topk_odd_even_sort_kernel_param_def[] =
{
@ -122,10 +151,14 @@ static vx_param_description_t _topk_odd_even_sort_kernel_param_def[] =
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
// Add kererl parameters here
};
#define _TOPK_ODD_EVEN_SORT_PARAM_NUM _cnt_of_array( _topk_odd_even_sort_kernel_param_def )
#define SCALAR_INPUT_SIZE (5)
#define SCALAR_INPUT_SIZE (9)
/*
* Kernel initializer
*/
@ -251,6 +284,22 @@ static vsi_status _query_kernel
case _PACK_SELECT_KEY(I8, I8):
key = TOPK_HASH_KEY( I32, I32, num_stages );
break;
case _PACK_SELECT_KEY(F32, U32):
case _PACK_SELECT_KEY(F16, U32):
case _PACK_SELECT_KEY(F32, U16):
case _PACK_SELECT_KEY(F16, U16):
case _PACK_SELECT_KEY(F32, U8):
case _PACK_SELECT_KEY(F16, U8):
key = TOPK_HASH_KEY( F32, U32, num_stages );
break;
case _PACK_SELECT_KEY(F32, I32):
case _PACK_SELECT_KEY(F16, I32):
case _PACK_SELECT_KEY(F32, I16):
case _PACK_SELECT_KEY(F16, I16):
case _PACK_SELECT_KEY(F32, I8):
case _PACK_SELECT_KEY(F16, I8):
key = TOPK_HASH_KEY( F32, I32, num_stages );
break;
default:
break;
}
@ -318,6 +367,22 @@ static vsi_status _query_odd_even_sort_kernel
case _PACK_SELECT_KEY(I8, I8):
key = TOPK_ODD_EVEN_SORT_HASH_KEY( I32, I32 );
break;
case _PACK_SELECT_KEY(F32, U32):
case _PACK_SELECT_KEY(F16, U32):
case _PACK_SELECT_KEY(F32, U16):
case _PACK_SELECT_KEY(F16, U16):
case _PACK_SELECT_KEY(F32, U8):
case _PACK_SELECT_KEY(F16, U8):
key = TOPK_ODD_EVEN_SORT_HASH_KEY2( F32, U32 );
break;
case _PACK_SELECT_KEY(F32, I32):
case _PACK_SELECT_KEY(F16, I32):
case _PACK_SELECT_KEY(F32, I16):
case _PACK_SELECT_KEY(F16, I16):
case _PACK_SELECT_KEY(F32, I8):
case _PACK_SELECT_KEY(F16, I8):
key = TOPK_ODD_EVEN_SORT_HASH_KEY2( F32, I32 );
break;
default:
break;
}
@ -372,14 +437,24 @@ static vsi_nn_kernel_node_t _setup
int32_t num_stages = (int32_t)ceil(log10(block_size / 2.0f) / log10(2.0f));
vsi_bool is_odd_even_sort = FALSE;
size_t param_num = _TOPK_PARAM_NUM;
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
outputScale = 1.0f / outputScale;
inputTail = -(inputTail * inputScale);
for (i = 1; i < inputs[0]->attr.dim_num; i ++)
{
block_num = block_num * inputs[0]->attr.size[i];
}
if( vsi_nn_is_same_type(inputs[0], outputs[0]) == FALSE ||
outputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_INT32 )
if ((vsi_nn_is_same_type(inputs[0], outputs[0]) == FALSE ||
outputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_INT32 ) &&
!(inputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_FLOAT16 &&
(outputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_UINT8 ||
outputs[0]->attr.dtype.vx_type == VSI_NN_TYPE_INT16)))
{
return NULL;
}
@ -425,10 +500,15 @@ static vsi_nn_kernel_node_t _setup
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
uint32_t index = (uint32_t)(input_num + output_num);
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, param_num,
rs_tensors, input_num, &rs_tensors[input_num], output_num );
/* Pass parameters to node. */
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &inputScale );
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &inputTail );
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &outputScale );
node_params[index++] = vsi_nn_kernel_scalar_create(graph, I32, &outputTail );
if (is_odd_even_sort)
{
node_params[SCALAR_INPUT_SIZE] = vsi_nn_kernel_scalar_create(
@ -452,8 +532,25 @@ final:
vsi_safe_release_tensor(rs_tensors[2]);
vsi_safe_release_tensor(rs_tensors[3]);
vsi_safe_release_tensor(rs_tensors[4]);
if (is_odd_even_sort)
{
if (node_params[5])
{
vsi_nn_kernel_scalar_release( &node_params[5] );
}
if (node_params[6])
{
vsi_nn_kernel_scalar_release( &node_params[6] );
}
if (node_params[7])
{
vsi_nn_kernel_scalar_release( &node_params[7] );
}
if (node_params[8])
{
vsi_nn_kernel_scalar_release( &node_params[8] );
}
if (node_params[SCALAR_INPUT_SIZE])
{
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SIZE] );
@ -461,6 +558,22 @@ final:
}
else
{
if (node_params[3])
{
vsi_nn_kernel_scalar_release( &node_params[3] );
}
if (node_params[4])
{
vsi_nn_kernel_scalar_release( &node_params[4] );
}
if (node_params[5])
{
vsi_nn_kernel_scalar_release( &node_params[5] );
}
if (node_params[6])
{
vsi_nn_kernel_scalar_release( &node_params[6] );
}
if (node_params[SCALAR_INPUT_NUM_STAGES])
{
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_NUM_STAGES] );

View File

@ -1,243 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (2)
#define _OUTPUT_NUM (1)
#define _CPU_IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.add_mean_std_norm")
/*
* Kernel params
*/
static vx_param_description_t _add_mean_std_norm_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _ADD_MEAN_STD_NORM_PARAM_NUM _cnt_of_array( _add_mean_std_norm_kernel_param_def )
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
int32_t i;
float mean = .0f, stddev_inv = .0f, variance = .0f, input_d = .0f, data = .0f, eps = .0f;
vsi_ssize_t v_size, n_batch, batch;
/* prepare data */
for(i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
}
for(i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[_CPU_IO_NUM], &(eps));
v_size = in_attr[0]->shape->data[0];
n_batch = in_attr[0]->shape->data[1];
for (batch = 0; batch < n_batch; ++batch)
{
float sum = 0.0f;
float sum_sq = 0.0f;
vsi_ssize_t index_base = batch * v_size;
for (i = 0; i < v_size; ++i)
{
vsi_ssize_t index = i + index_base;
input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
sum += input_d;
sum_sq += input_d * input_d;
}
mean = sum / v_size;
stddev_inv = 0.0f;
variance = sum_sq / v_size - mean * mean;
if (variance == 0)
{
stddev_inv = (float)(1.0f / sqrt(eps));
}
else
{
stddev_inv = (float)(1.0f / sqrt(variance));
}
for (i = 0; i < v_size; ++i)
{
vsi_ssize_t index = i + index_base;
input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
data = (input_d - mean) * stddev_inv;
f32_out_buffer[0][index] = data;
}
}
/* save data */
for(i = 0; i < _OUTPUT_NUM; i++)
{
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
for (i = 0; i < _INPUT_NUM; i++)
{
if (f32_in_buffer[i])
{
free(f32_in_buffer[i]);
f32_in_buffer[i] = NULL;
}
if (in_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
}
for(i = 0; i < _OUTPUT_NUM; i++)
{
if (f32_out_buffer[i])
{
free(f32_out_buffer[i]);
f32_out_buffer[i] = NULL;
}
if (out_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
)
{
vsi_status status = VSI_FAILURE;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _add_mean_std_norm_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _add_mean_std_norm_kernel_param_def );
status = VSI_SUCCESS;
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_ADD_MEAN_STD_NORM_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
status = _query_kernel( kernel, inputs, outputs );
if( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _ADD_MEAN_STD_NORM_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[_CPU_IO_NUM] = vsi_nn_kernel_scalar_create( graph, F32, &eps );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _ADD_MEAN_STD_NORM_PARAM_NUM );
VSI_ASSERT( status == VSI_SUCCESS );
vsi_nn_kernel_scalar_release( &node_params[_CPU_IO_NUM] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( add_mean_std_norm, _setup )

View File

@ -1,201 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_error.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "utils/vsi_nn_dtype_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
#define _CPU_ARG_NUM (1)
#define _CPU_INPUT_NUM (1)
#define _CPU_OUTPUT_NUM (1)
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("argmax_sw")
DEF_KERNEL_EXECUTOR(_argmax_exec)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
float * buffer[_CPU_IO_NUM] = { NULL };
size_t out_elements = 0;
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
int32_t i;
int32_t axis = 0;
vsi_ssize_t outerSize = 1;
vsi_ssize_t axisSize = 1;
vsi_ssize_t innerSize = 1;
vsi_ssize_t inner = 0;
vsi_ssize_t outer = 0;
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
CHECK_STATUS_FAIL_GOTO(status, final );
buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
buffer[1] = (float *)malloc( out_elements * sizeof(float) );
CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
memset( buffer[1], 0, out_elements * sizeof(float) );
for (i = 0; i < axis; i++)
{
innerSize *= attr[0]->shape->data[i];
}
axisSize = attr[0]->shape->data[axis];
for (i = axis + 1; i < (int32_t)attr[0]->shape->size; i++)
{
outerSize *= attr[0]->shape->data[i];
}
for ( outer = 0; outer < outerSize; ++outer)
{
for ( inner = 0; inner < innerSize; ++inner)
{
float minMaxValue = buffer[0][outer * axisSize * innerSize + inner];
int32_t minMaxIndex = 0;
for (i = 1; i < axisSize; ++i)
{
float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
if (value > minMaxValue)
{
minMaxValue = value;
minMaxIndex = i;
}
}
buffer[1][outer * innerSize + inner] = (float)minMaxIndex;
}
}
status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
buffer[1], out_elements );
CHECK_STATUS_FAIL_GOTO( status, final );
final:
for( i = 0; i < _CPU_IO_NUM; i ++ )
{
if( buffer[i] )
{
free( buffer[i] );
}
vsi_nn_kernel_tensor_attr_release( &attr[i] );
}
return status;
} /* _minimum_exec() */
static vx_param_description_t kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
};
static vsi_status _query_kernel
(
vsi_nn_tensor_t* const* const inputs,
vsi_nn_tensor_t* const* const outputs,
vsi_nn_kernel_t* kernel
)
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _argmax_exec;
kernel->info.parameters = kernel_param_def;
kernel->info.numParams = _cnt_of_array( kernel_param_def );
return VSI_SUCCESS;
} /* _query_kernel() */
#define SCALAR_INPUT_AXIS (2)
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_SUCCESS;
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
int32_t axis = 0;
axis = vsi_nn_kernel_param_get_int32(params, "axis");
status = _query_kernel( inputs, outputs, kernel );
if( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
graph, I32, &axis );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_AXIS] );
}
else
{
status = VSI_FAILURE;
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( argmax, _setup )

View File

@ -1,202 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_error.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "utils/vsi_nn_dtype_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
#define _CPU_ARG_NUM (1)
#define _CPU_INPUT_NUM (1)
#define _CPU_OUTPUT_NUM (1)
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("argmin_sw")
DEF_KERNEL_EXECUTOR(_argmin_exec)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
float * buffer[_CPU_IO_NUM] = { NULL };
size_t out_elements = 0;
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
int32_t i;
int32_t axis = 0;
vsi_ssize_t outerSize = 1;
vsi_ssize_t axisSize = 1;
vsi_ssize_t innerSize = 1;
vsi_ssize_t inner = 0;
vsi_ssize_t outer = 0;
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &axis);
CHECK_STATUS_FAIL_GOTO(status, final );
buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[1] );
buffer[1] = (float *)malloc( out_elements * sizeof(float) );
CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
memset( buffer[1], 0, out_elements * sizeof(float) );
for (i = 0; i < axis; i++)
{
innerSize *= attr[0]->shape->data[i];
}
axisSize = attr[0]->shape->data[axis];
for (i = axis + 1; i < (int32_t)attr[0]->shape->size; i++)
{
outerSize *= attr[0]->shape->data[i];
}
for ( outer = 0; outer < outerSize; ++outer)
{
for ( inner = 0; inner < innerSize; ++inner)
{
float minMaxValue = buffer[0][outer * axisSize * innerSize + inner];
int32_t minMaxIndex = 0;
for (i = 1; i < axisSize; ++i)
{
float value = buffer[0][(outer * axisSize + i) * innerSize + inner];
if (value < minMaxValue)
{
minMaxValue = value;
minMaxIndex = i;
}
}
buffer[1][outer * innerSize + inner] = (float)minMaxIndex;
}
}
status = vsi_nn_kernel_tensor_write_from_float( tensors[1], attr[1],
buffer[1], out_elements );
CHECK_STATUS_FAIL_GOTO( status, final );
final:
for( i = 0; i < _CPU_IO_NUM; i ++ )
{
if( buffer[i] )
{
free( buffer[i] );
}
vsi_nn_kernel_tensor_attr_release( &attr[i] );
}
return status;
} /* _minimum_exec() */
static vx_param_description_t kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
};
static vsi_status _query_kernel
(
vsi_nn_tensor_t* const* const inputs,
vsi_nn_tensor_t* const* const outputs,
vsi_nn_kernel_t* kernel
)
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _argmin_exec;
kernel->info.parameters = kernel_param_def;
kernel->info.numParams = _cnt_of_array( kernel_param_def );
return VSI_SUCCESS;
} /* _query_kernel() */
#define SCALAR_INPUT_AXIS (2)
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_SUCCESS;
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
int32_t axis = 0;
axis = vsi_nn_kernel_param_get_int32(params, "axis");
status = _query_kernel( inputs, outputs, kernel );
if( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
backend_params[SCALAR_INPUT_AXIS] = vsi_nn_kernel_scalar_create(
graph, I32, &axis );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_AXIS] );
}
else
{
status = VSI_FAILURE;
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( argmin, _setup )

View File

@ -1,277 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (4)
#define _OUTPUT_NUM (1)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.axis_aligned_bbox_transform")
typedef struct vsi_nn_box_encoding_corner_t
{
float x1, y1, x2, y2;
}vsi_nn_box_encoding_corner;
typedef struct vsi_nn_box_encoding_center_t
{
float w, h, x, y;
}vsi_nn_box_encoding_center;
/*
* Kernel params
*/
static vx_param_description_t _axis_aligned_bbox_transform_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
};
#define _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM _cnt_of_array( _axis_aligned_bbox_transform_kernel_param_def )
static void _to_box_encoding_corner
(
vsi_nn_box_encoding_center* ctr,
vsi_nn_box_encoding_corner* cnr
)
{
cnr->x1 = ctr->x - ctr->w / 2;
cnr->y1 = ctr->y - ctr->h / 2;
cnr->x2 = ctr->x + ctr->w / 2;
cnr->y2 = ctr->y + ctr->h / 2;
}
static void _to_box_encoding_center
(
vsi_nn_box_encoding_corner* cnr,
vsi_nn_box_encoding_center* ctr
)
{
ctr->w = cnr->x2 - cnr->x1;
ctr->h = cnr->y2 - cnr->y1;
ctr->x = (cnr->x1 + cnr->x2) / 2;
ctr->y = (cnr->y1 + cnr->y2) / 2;
}
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
uint32_t i;
const uint32_t roiLength = 4;
const uint32_t imageLength = 2;
vsi_size_t numClasses = 0;
vsi_size_t numRois = 0;
vsi_size_t j;
vsi_size_t roiIndex;
/* prepare data */
for (i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
}
for (i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
numClasses = in_attr[1]->shape->data[0] / roiLength;
numRois = in_attr[0]->shape->data[1];
for (roiIndex = 0; roiIndex < numRois; roiIndex++)
{
uint32_t batchIndex = (uint32_t)f32_in_buffer[2][roiIndex];
float imageHeight = f32_in_buffer[3][batchIndex * imageLength];
float imageWidth = f32_in_buffer[3][batchIndex * imageLength + 1];
vsi_nn_box_encoding_corner roi_cnr;
vsi_nn_box_encoding_center roiBefore;
roi_cnr.x1 = f32_in_buffer[0][roiIndex * roiLength];
roi_cnr.y1 = f32_in_buffer[0][roiIndex * roiLength + 1];
roi_cnr.x2 = f32_in_buffer[0][roiIndex * roiLength + 2];
roi_cnr.y2 = f32_in_buffer[0][roiIndex * roiLength + 3];
_to_box_encoding_center(&roi_cnr, &roiBefore);
for (j = 0; j < numClasses; j++)
{
vsi_nn_box_encoding_center roi_ctr;
vsi_nn_box_encoding_corner roiAfter;
vsi_nn_box_encoding_corner cliped;
vsi_size_t index = (roiIndex * numClasses + j) * roiLength;
roi_ctr.w = (float)(exp(f32_in_buffer[1][index + 2]) * roiBefore.w);
roi_ctr.h = (float)(exp(f32_in_buffer[1][index + 3]) * roiBefore.h);
roi_ctr.x = roiBefore.x + f32_in_buffer[1][index] * roiBefore.w;
roi_ctr.y = roiBefore.y + f32_in_buffer[1][index + 1] * roiBefore.h;
_to_box_encoding_corner(&roi_ctr, &roiAfter);
cliped.x1 = vsi_nn_min(vsi_nn_max(roiAfter.x1, 0.0f), imageWidth);
cliped.y1 = vsi_nn_min(vsi_nn_max(roiAfter.y1, 0.0f), imageHeight);
cliped.x2 = vsi_nn_min(vsi_nn_max(roiAfter.x2, 0.0f), imageWidth);
cliped.y2 = vsi_nn_min(vsi_nn_max(roiAfter.y2, 0.0f), imageHeight);
f32_out_buffer[0][index] = cliped.x1;
f32_out_buffer[0][index + 1] = cliped.y1;
f32_out_buffer[0][index + 2] = cliped.x2;
f32_out_buffer[0][index + 3] = cliped.y2;
}
}
/* save data */
for(i = 0; i < _OUTPUT_NUM; i++)
{
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
for (i = 0; i < _INPUT_NUM; i++)
{
if (f32_in_buffer[i])
{
free(f32_in_buffer[i]);
f32_in_buffer[i] = NULL;
}
if (in_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
}
for (i = 0; i < _OUTPUT_NUM; i++)
{
if (f32_out_buffer[i])
{
free(f32_out_buffer[i]);
f32_out_buffer[i] = NULL;
}
if (out_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _axis_aligned_bbox_transform_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _axis_aligned_bbox_transform_kernel_param_def );
status = VSI_SUCCESS;
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
if( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM,
inputs, input_num, outputs, output_num );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _AXIS_ALIGNED_BBOX_TRANSFORM_PARAM_NUM );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( axis_aligned_bbox_transform, _setup )

View File

@ -1,222 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_error.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_tensor_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_eltwise.h"
__BEGIN_DECLS
#define _CPU_ARG_NUM (1)
#define _CPU_INPUT_NUM (5)
#define _CPU_OUTPUT_NUM (1)
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("batch_norm_sw")
static vsi_ssize_t _expand_offset
(
vsi_ssize_t index,
vsi_size_t * shape, vsi_size_t rank,
vsi_size_t * strides, vsi_size_t * out_shape
)
{
vsi_size_t i;
vsi_ssize_t offset = 0;
for( i = 0; i < rank && index; i ++ )
{
if( shape[i] == out_shape[i] )
{
offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
}
index /= out_shape[i];
}
return offset;
}
DEF_KERNEL_EXECUTOR(_batch_norm_exec)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VX_SUCCESS;
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
float * buffer[_CPU_IO_NUM] = { NULL };
vsi_size_t out_elements = 0;
vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
uint32_t i = 0;
float eps = 0.f;
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[param_size - 1], &eps);
CHECK_STATUS_FAIL_GOTO(status, final );
for ( i = 0; i < _CPU_INPUT_NUM; i++)
{
tensors[i] = (vsi_nn_kernel_tensor_t)param[i];
attr[i] = vsi_nn_kernel_tensor_attr_create( tensors[i] );
vsi_nn_kernel_tensor_attr_get_stride( attr[i], stride_size[i] );
buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[i], attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( buffer[i], "Create input buffer fail.", final );
}
tensors[5] = (vsi_nn_kernel_tensor_t)param[5];
attr[5] = vsi_nn_kernel_tensor_attr_create( tensors[5] );
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[5] );
buffer[5] = (float *)malloc( out_elements * sizeof(float) );
CHECK_PTR_FAIL_GOTO( buffer[5], "Create output buffer fail.", final );
memset( buffer[5], 0, out_elements * sizeof(float) );
for( i = 0; i < out_elements; i ++ )
{
vsi_ssize_t in_offset[5] = {0};
int32_t j = 0;
float src = 0.f;
float mean = 0.f;
float variance = 0.f;
float beta = 0.f;
float gamma = 0.f;
for ( j = 0; j < 5; j++)
{
in_offset[j] = _expand_offset( i, attr[j]->shape->data, (vsi_size_t)attr[j]->shape->size,
stride_size[j], attr[5]->shape->data );
}
src = buffer[0][in_offset[0]];
mean = buffer[1][in_offset[1]];
variance = buffer[2][in_offset[2]];
gamma = buffer[3][in_offset[3]];
beta = buffer[4][in_offset[4]];
buffer[5][i] = (src - mean) * gamma/ sqrtf(variance + eps) + beta;
}
status = vsi_nn_kernel_tensor_write_from_float( tensors[5], attr[5],
buffer[5], out_elements );
CHECK_STATUS_FAIL_GOTO( status, final );
final:
for( i = 0; i < _CPU_IO_NUM; i ++ )
{
if( buffer[i] )
{
free( buffer[i] );
}
vsi_nn_kernel_tensor_attr_release( &attr[i] );
}
return status;
} /* _batch_norm_exec() */
static vx_param_description_t kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define SCALAR_INPUT_EPS (6)
static vsi_status _query_kernel
(
vsi_nn_tensor_t* const* const inputs,
vsi_nn_tensor_t* const* const outputs,
vsi_nn_kernel_t* kernel
)
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _batch_norm_exec;
kernel->info.parameters = kernel_param_def;
kernel->info.numParams = _cnt_of_array( kernel_param_def );
return VSI_SUCCESS;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_SUCCESS;
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
float eps = 0;
eps = vsi_nn_kernel_param_get_float32(params, "eps");
status = _query_kernel( inputs, outputs, kernel );
if( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( backend_params, _CPU_PARAM_NUM,
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
/* Pass parameters to node. */
backend_params[SCALAR_INPUT_EPS] = vsi_nn_kernel_scalar_create(
graph, F32, &eps );
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
vsi_nn_kernel_scalar_release( &backend_params[SCALAR_INPUT_EPS] );
}
else
{
status = VSI_FAILURE;
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( batchnorm_single, _setup )

View File

@ -1,534 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (3)
#define _OUTPUT_NUM (4)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.box_with_nms_limit")
/*
* Kernel params
*/
static vx_param_description_t _box_with_nms_limit_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _BOX_WITH_NMS_LIMIT_PARAM_NUM _cnt_of_array( _box_with_nms_limit_kernel_param_def )
#define SCORE_THRESHOLD (7)
#define MAX_NUM_DETECTIONS (8)
#define NMS_KERNEL_METHOD (9)
#define IOU_THRESHOLD (10)
#define SIGMA (11)
#define NMS_SCORE_THRESHOLD (12)
static float hard_nms_kernel
(
float iou,
float iouThreshold
)
{
return iou < iouThreshold ? 1.0f : 0.0f;
}
static float linear_nms_kernel
(
float iou,
float iouThreshold
)
{
return iou < iouThreshold ? 1.0f : 1.0f - iou;
}
static float gaussian_nms_kernel
(
float iou,
float sigma
)
{
return (float)(exp(-1.0f * iou * iou / sigma));
}
void swap_element
(
uint32_t* list,
uint32_t first,
uint32_t second
)
{
uint32_t temp = list[first];
list[first] = list[second];
list[second] = temp;
}
uint32_t max_element
(
float* data,
uint32_t* index_list,
uint32_t len
)
{
uint32_t i;
uint32_t max_index = 0;
float max_val = data[index_list[0]];
for(i = 1; i < len; i++)
{
float val = data[index_list[i]];
if (max_val < val)
{
max_val = val;
max_index = i;
}
}
return max_index;
}
static uint32_t max_comp_func
(
void* data,
int32_t left,
int32_t right
)
{
float* fdata = (float*)data;
return fdata[left] >= fdata[right];
}
void sort_element_by_score
(
float* data,
uint32_t* index_list,
uint32_t len
)
{
vsi_nn_partition(data, 0, len - 1, max_comp_func, TRUE, index_list);
}
typedef struct
{
float* fdata;
uint32_t numClasses;
} class_comp_param;
static uint32_t class_comp_func
(
void* data,
int32_t left,
int32_t right
)
{
class_comp_param *p = (class_comp_param*)data;
float* fdata = p->fdata;
uint32_t numClasses = p->numClasses;
uint32_t lhsClass = left % numClasses, rhsClass = right % numClasses;
return lhsClass == rhsClass ? fdata[left] > fdata[right]
: lhsClass < rhsClass;
}
static void sort_element_by_class
(
float* data,
uint32_t* index_list,
uint32_t len,
uint32_t numClasses
)
{
class_comp_param class_comp;
class_comp.fdata = data;
class_comp.numClasses = numClasses;
vsi_nn_partition(&class_comp, 0, len - 1, class_comp_func, TRUE, index_list);
}
// Taking two indices of bounding boxes, return the intersection-of-union.
float getIoUAxisAligned
(
const float* roi1,
const float* roi2
)
{
const float area1 = (roi1[2] - roi1[0]) * (roi1[3] - roi1[1]);
const float area2 = (roi2[2] - roi2[0]) * (roi2[3] - roi2[1]);
const float x1 = vsi_nn_max(roi1[0], roi2[0]);
const float x2 = vsi_nn_min(roi1[2], roi2[2]);
const float y1 = vsi_nn_max(roi1[1], roi2[1]);
const float y2 = vsi_nn_min(roi1[3], roi2[3]);
const float w = vsi_nn_max(x2 - x1, 0.0f);
const float h = vsi_nn_max(y2 - y1, 0.0f);
const float areaIntersect = w * h;
const float areaUnion = area1 + area2 - areaIntersect;
return areaIntersect / areaUnion;
}
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
int32_t* int32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
int32_t* int32_out_buffer[_OUTPUT_NUM] = {0};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
uint32_t i = 0;
float score_threshold = 0;
int32_t max_num_detections = 0;
int32_t nms_kernel_method = 0;
float iou_threshold = 0;
float sigma = 0;
float nms_score_threshold = 0;
uint32_t j = 0, n = 0, b = 0, c = 0;
const uint32_t kRoiDim = 4;
uint32_t numRois = 0;
uint32_t numClasses = 0;
int32_t ind = 0;
uint32_t * batch_data = NULL;
int32_t numBatch = 0;
uint32_t * select = NULL;
uint32_t select_size = 0;
uint32_t scores_index = 0;
uint32_t roi_index = 0;
uint32_t roi_out_index = 0;
/* prepare data */
for (i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
if (i == 2)
{
int32_in_buffer[i] = (int32_t*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( int32_in_buffer[i], "Create input buffer fail.", final );
}
else
{
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input buffer fail.", final );
}
}
for (i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
if (i < 2)
{
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
else
{
int32_out_buffer[i] = (int32_t *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( int32_out_buffer[i], "Create output buffer fail.", final );
memset( int32_out_buffer[i], 0, out_bytes[i] );
}
}
#define VSI_NN_KERNEL_READ_SCALAR(type, idx, pointer) \
vsi_nn_kernel_scalar_read_##type((vsi_nn_kernel_scalar_t)param[idx], pointer)
status = VSI_NN_KERNEL_READ_SCALAR(float32, SCORE_THRESHOLD, &score_threshold);
status |= VSI_NN_KERNEL_READ_SCALAR(int32, MAX_NUM_DETECTIONS, &max_num_detections);
status |= VSI_NN_KERNEL_READ_SCALAR(int32, NMS_KERNEL_METHOD, &nms_kernel_method);
status |= VSI_NN_KERNEL_READ_SCALAR(float32, IOU_THRESHOLD, &iou_threshold);
status |= VSI_NN_KERNEL_READ_SCALAR(float32, SIGMA, &sigma);
status |= VSI_NN_KERNEL_READ_SCALAR(float32, NMS_SCORE_THRESHOLD, &nms_score_threshold);
CHECK_STATUS_FAIL_GOTO(status, final );
#undef VSI_NN_KERNEL_READ_SCALAR
numRois = (uint32_t)in_attr[0]->shape->data[1];
numClasses = (uint32_t)in_attr[0]->shape->data[0];
batch_data = (uint32_t*)malloc(numRois * sizeof(uint32_t));
CHECK_PTR_FAIL_GOTO( batch_data, "Create batch_data fail.", final );
memset(batch_data, 0, numRois * sizeof(uint32_t));
for (i = 0, ind = -1; i < numRois; i++)
{
if (int32_in_buffer[2][i] != ind)
{
ind = int32_in_buffer[2][i];
numBatch++;
}
batch_data[numBatch - 1]++;
}
select = (uint32_t*)malloc(numBatch * numRois
* numClasses * sizeof(uint32_t));
CHECK_PTR_FAIL_GOTO( select, "Create select fail.", final );
memset(select, 0, numBatch * numRois * numClasses * sizeof(uint32_t));
for (n = 0; n < (uint32_t)numBatch; n++)
{
int32_t numDetections_batch = 0;
uint32_t select_start_batch = select_size;
uint32_t select_len = 0;
// Exclude class 0 (background)
for (c = 1; c < numClasses; c++)
{
uint32_t select_start = select_size;
int32_t maxNumDetections0 = max_num_detections;
uint32_t numDetections = 0;
for (b = 0; b < batch_data[n]; b++)
{
uint32_t index = b * numClasses + c;
float score = f32_in_buffer[0][scores_index + index];
if (score > score_threshold) {
select[select_size] = index;
select_size++;
}
}
select_len = select_size - select_start;
if (maxNumDetections0 < 0)
{
maxNumDetections0 = select_len;
}
for (j = 0; (j < select_len && numDetections < (uint32_t)maxNumDetections0); j++)
{
// find max score and swap to the front.
int32_t max_index = max_element(&(f32_in_buffer[0][scores_index]),
&(select[select_start + j]), select_len - j) + j;
swap_element(&(select[select_start]), max_index, j);
// Calculate IoU of the rest, swap to the end (disgard) if needed.
for (i = j + 1; i < select_len; i++)
{
int32_t roiBase0 = roi_index + select[select_start + i] * kRoiDim;
int32_t roiBase1 = roi_index + select[select_start + j] * kRoiDim;
float iou = getIoUAxisAligned(&(f32_in_buffer[1][roiBase0]),
&(f32_in_buffer[1][roiBase1]));
float kernel_iou;
if (nms_kernel_method == 0)
{
kernel_iou = hard_nms_kernel(iou, iou_threshold);
}
else if (nms_kernel_method == 1)
{
kernel_iou = linear_nms_kernel(iou, iou_threshold);
}
else
{
kernel_iou = gaussian_nms_kernel(iou, sigma);
}
f32_in_buffer[0][scores_index + select[select_start + i]] *= kernel_iou;
if (f32_in_buffer[0][scores_index + select[select_start + i]] < nms_score_threshold)
{
swap_element(&(select[select_start]), i, select_len - 1);
i--;
select_len--;
}
}
numDetections++;
}
select_size = select_start + select_len;
numDetections_batch += numDetections;
}
// Take top max_num_detections.
sort_element_by_score(&(f32_in_buffer[0][scores_index]), &(select[select_start_batch]),
numDetections_batch);
if (numDetections_batch > max_num_detections && max_num_detections >= 0)
{
select_size = select_start_batch + max_num_detections;
}
select_len = select_size - select_start_batch;
// Sort again by class.
sort_element_by_class(&(f32_in_buffer[0][scores_index]), &(select[select_start_batch]),
select_len, numClasses);
for (i = 0; i < select_len; i++)
{
int32_t in_index0 = scores_index + select[select_start_batch + i];
int32_t in_index1 = roi_index + select[select_start_batch + i] * kRoiDim;
f32_out_buffer[0][roi_out_index] = f32_in_buffer[0][in_index0];
memcpy(&(f32_out_buffer[1][roi_out_index * kRoiDim]),
&f32_in_buffer[1][in_index1], kRoiDim * sizeof(float));
int32_out_buffer[2][roi_out_index] = select[select_start_batch + i] % numClasses;
int32_out_buffer[3][roi_out_index] = n;
roi_out_index++;
}
scores_index += batch_data[n] * numClasses;
roi_index += batch_data[n] * numClasses * kRoiDim;
}
/* save data */
for(i = 0; i < _OUTPUT_NUM; i++)
{
if (i < 2)
{
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
}
else
{
status = vsi_nn_kernel_tensor_write( output[i], out_attr[i],
int32_out_buffer[i], out_bytes[i] );
}
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
vsi_nn_safe_free(batch_data);
vsi_nn_safe_free(select);
for (i = 0; i < _INPUT_NUM; i++)
{
vsi_nn_safe_free(f32_in_buffer[i]);
vsi_nn_safe_free(int32_in_buffer[i]);
if (in_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
}
for (i = 0; i < _OUTPUT_NUM; i++)
{
vsi_nn_safe_free(f32_out_buffer[i]);
vsi_nn_safe_free(int32_out_buffer[i]);
if (out_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_SUCCESS;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _box_with_nms_limit_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _box_with_nms_limit_kernel_param_def );
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_BOX_WITH_NMS_LIMIT_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
float score_threshold = vsi_nn_kernel_param_get_float32( params, "score_threshold" );
int32_t max_num_detections = vsi_nn_kernel_param_get_int32( params, "max_num_detections" );
int32_t nms_kernel_method = vsi_nn_kernel_param_get_int32( params, "nms_kernel_method" );
float iou_threshold = vsi_nn_kernel_param_get_float32( params, "iou_threshold" );
float sigma = vsi_nn_kernel_param_get_float32( params, "sigma" );
float nms_score_threshold = vsi_nn_kernel_param_get_float32( params, "nms_score_threshold" );
status = _query_kernel( kernel, inputs, outputs );
if ( VSI_SUCCESS == status )
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _BOX_WITH_NMS_LIMIT_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[SCORE_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &score_threshold );
node_params[MAX_NUM_DETECTIONS] = vsi_nn_kernel_scalar_create( graph, I32, &max_num_detections );
node_params[NMS_KERNEL_METHOD] = vsi_nn_kernel_scalar_create( graph, I32, &nms_kernel_method );
node_params[IOU_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &iou_threshold );
node_params[SIGMA] = vsi_nn_kernel_scalar_create( graph, F32, &sigma );
node_params[NMS_SCORE_THRESHOLD] = vsi_nn_kernel_scalar_create( graph, F32, &nms_score_threshold );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _BOX_WITH_NMS_LIMIT_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[SCORE_THRESHOLD] );
vsi_nn_kernel_scalar_release( &node_params[MAX_NUM_DETECTIONS] );
vsi_nn_kernel_scalar_release( &node_params[NMS_KERNEL_METHOD] );
vsi_nn_kernel_scalar_release( &node_params[IOU_THRESHOLD] );
vsi_nn_kernel_scalar_release( &node_params[SIGMA] );
vsi_nn_kernel_scalar_release( &node_params[NMS_SCORE_THRESHOLD] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( box_with_nms_limit, _setup )

View File

@ -1,229 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (2)
#define _OUTPUT_NUM (1)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.bucketize")
/*
* Kernel params
*/
static vx_param_description_t _bucketize_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _BUCKETIZE_PARAM_NUM _cnt_of_array( _bucketize_kernel_param_def )
#define SCALAR_RIGHT_VALUE (3)
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
uint32_t i = 0, j = 0;
int32_t right = 0;
uint32_t boundaries_size = 0;
/* prepare data */
for(i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
}
for(i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_RIGHT_VALUE], &(right));
boundaries_size = (uint32_t)in_attr[1]->shape->data[0];
for (i = 0; i < out_elements[0]; i++)
{
float src0 = f32_in_buffer[0][i];
float dst = 0;
for (j = 0; j < boundaries_size; j++)
{
float src1 = f32_in_buffer[1][j];
if (right == 1)
{
dst += (src0 >= src1 ? 1.0f : 0.0f);
}
else
{
dst += (src0 > src1 ? 1.0f : 0.0f);
}
}
f32_out_buffer[0][i] = dst;
}
/* save data */
for(i = 0; i < _OUTPUT_NUM; i++)
{
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
for (i = 0; i < _INPUT_NUM; i++)
{
if (f32_in_buffer[i])
{
free(f32_in_buffer[i]);
f32_in_buffer[i] = NULL;
}
if (in_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
}
for(i = 0; i < _OUTPUT_NUM; i++)
{
if (f32_out_buffer[i])
{
free(f32_out_buffer[i]);
f32_out_buffer[i] = NULL;
}
if (out_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _bucketize_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _bucketize_kernel_param_def );
status = VSI_SUCCESS;
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_BUCKETIZE_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
int32_t right = vsi_nn_kernel_param_get_int32( params, "right" );
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _BUCKETIZE_PARAM_NUM,
inputs, input_num, outputs, output_num );
/* Pass parameters to node. */
node_params[SCALAR_RIGHT_VALUE] = vsi_nn_kernel_scalar_create( graph, I32, &right );
status = vsi_nn_kernel_node_pass_param( node, node_params, _BUCKETIZE_PARAM_NUM );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_RIGHT_VALUE] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( bucketize, _setup )

View File

@ -1,217 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "utils/vsi_nn_dtype_util_prv.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.cast")
/*
* Kernel params
*/
static vx_param_description_t _cast_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
};
#define _CAST_PARAM_NUM _cnt_of_array( _cast_kernel_param_def )
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
uint32_t i;
double max_value = 0.0f, min_value = 0.0f;
vsi_bool clamp_flag = FALSE;
vsi_nn_type_e out_type;
/* prepare data */
for(i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
in_attr[i]->quant = VSI_NN_KERNEL_QUANT_NONE;
in_attr[i]->dfp.fl = 0;
in_attr[i]->asymm.scale = 1.0f;
in_attr[i]->asymm.zero_point = 0;
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
}
for(i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
out_type = vsi_nn_dtype_map_kernel(out_attr[0]->dtype);
if( type_is_integer( out_type ) )
{
clamp_flag = TRUE;
type_get_range(out_type, &max_value, &min_value);
}
for (i = 0; i < out_elements[0]; i++)
{
float val = f32_in_buffer[0][i];
if (clamp_flag)
{
val = vsi_nn_clamp(val, (float)min_value, (float)max_value);
}
f32_out_buffer[0][i] = val;
}
/* save data */
for(i = 0; i < _OUTPUT_NUM; i++)
{
out_attr[i]->quant = VSI_NN_KERNEL_QUANT_NONE;
out_attr[i]->dfp.fl = 0;
out_attr[i]->asymm.scale = 1.0f;
out_attr[i]->asymm.zero_point = 0;
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
for (i = 0; i < _INPUT_NUM; i++)
{
if (f32_in_buffer[i])
{
free(f32_in_buffer[i]);
f32_in_buffer[i] = NULL;
}
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
for(i = 0; i < _OUTPUT_NUM; i++)
{
if (f32_out_buffer[i])
{
free(f32_out_buffer[i]);
f32_out_buffer[i] = NULL;
}
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
)
{
vsi_status status = VSI_FAILURE;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _cast_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _cast_kernel_param_def );
status = VSI_SUCCESS;
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_CAST_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
status = _query_kernel( kernel, inputs, outputs );
if( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _CAST_PARAM_NUM,
inputs, input_num, outputs, output_num );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _CAST_PARAM_NUM );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( cast, _setup )

View File

@ -1,217 +0,0 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.clip")
/*
* Kernel params
*/
static vx_param_description_t _clip_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _CLIP_PARAM_NUM _cnt_of_array( _clip_kernel_param_def )
#define SCALAR_MIN_VALUE (2)
#define SCALAR_MAX_VALUE (3)
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
uint32_t i;
float min_value = 0.0f;
float max_value = 0.0f;
/* prepare data */
for(i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
}
for(i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MIN_VALUE], &(min_value));
vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[SCALAR_MAX_VALUE], &(max_value));
for (i = 0; i < out_elements[0]; i++)
{
f32_out_buffer[0][i] = vsi_nn_clamp(f32_in_buffer[0][i], min_value, max_value);
}
/* save data */
for(i = 0; i < _OUTPUT_NUM; i++)
{
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
for (i = 0; i < _INPUT_NUM; i++)
{
if (f32_in_buffer[i])
{
free(f32_in_buffer[i]);
f32_in_buffer[i] = NULL;
}
if (in_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
}
for(i = 0; i < _OUTPUT_NUM; i++)
{
if (f32_out_buffer[i])
{
free(f32_out_buffer[i]);
f32_out_buffer[i] = NULL;
}
if (out_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
)
{
vsi_status status = VSI_FAILURE;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _clip_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _clip_kernel_param_def );
status = VSI_SUCCESS;
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_CLIP_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
float min_value = vsi_nn_kernel_param_get_float32( params, "min_value" );
float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
status = _query_kernel( kernel, inputs, outputs );
if( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _CLIP_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[SCALAR_MIN_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &min_value );
node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _CLIP_PARAM_NUM );
VSI_ASSERT( status == VSI_SUCCESS );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MIN_VALUE] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_VALUE] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( clip, _setup )

Some files were not shown because too many files have changed in this diff Show More