Update prebuilt and internal for 22Q1 release(#349)
update driver to REL/6.4.10.2 update internal to commit-id: 33cfb75b Co-authored-by: zhouheng.zheng <zhouheng.zheng@ouotlook.com>
This commit is contained in:
parent
d0af7ae8df
commit
20e27ed550
|
|
@ -1 +1 @@
|
|||
REL/6.4.9
|
||||
REL/6.4.10.2
|
||||
|
|
|
|||
|
|
@ -3347,6 +3347,36 @@ VX_API_ENTRY vx_status VX_API_CALL vxSwapTensorHandle(vx_tensor tensor, void* ne
|
|||
VX_API_ENTRY vx_status VX_API_CALL vxCopyTensorPatch(vx_tensor tensor, vx_size number_of_dims, const vx_size * view_start, const vx_size * view_end,
|
||||
const vx_size * user_stride, void * user_ptr, vx_enum usage, vx_enum user_memory_type);
|
||||
|
||||
/*! \brief Allows the application to copy a view patch from/into an tensor object .
|
||||
* \param [in] tensor The reference to the tensor object that is the source or the
|
||||
* destination of the copy.
|
||||
* \param [in] number_of_dims Number of patch dimension. Error return if 0 or greater than number of
|
||||
* tensor dimensions. If smaller than number of tensor dimensions, the lower dimensions are assumed.
|
||||
* \param [in] view_start Array of patch start points in each dimension
|
||||
* \param [in] view_end Array of patch end points in each dimension
|
||||
* \param [in] tensorpatch_addressing Pointer to parameter of type <tt>\ref vx_tensorpatch_addressing_t</tt>.
|
||||
* \param [in] user_ptr The address of the memory location where to store the requested data
|
||||
* if the copy was requested in read mode, or from where to get the data to store into the tensor
|
||||
* object if the copy was requested in write mode. The accessible memory must be large enough
|
||||
* to contain the specified patch with the specified layout:\n
|
||||
* accessible memory in bytes >= (end[last_dimension] - start[last_dimension]) * stride[last_dimension].\n
|
||||
* The layout of the user memory must follow a row major order.
|
||||
* \param [in] usage This declares the effect of the copy with regard to the tensor object
|
||||
* using the <tt>\ref vx_accessor_e</tt> enumeration. Only <tt>\ref VX_READ_ONLY</tt> and <tt>\ref VX_WRITE_ONLY</tt> are supported:
|
||||
* \arg <tt>\ref VX_READ_ONLY</tt> means that data is copied from the tensor object into the application memory
|
||||
* \arg <tt>\ref VX_WRITE_ONLY</tt> means that data is copied into the tensor object from the application memory
|
||||
* \param [in] user_memory_type A <tt>\ref vx_memory_type_e</tt> enumeration that specifies
|
||||
* the memory type of the memory referenced by the user_addr.
|
||||
* \return A <tt>\ref vx_status_e</tt> enumeration.
|
||||
* \retval VX_ERROR_OPTIMIZED_AWAY This is a reference to a virtual tensor that cannot be
|
||||
* accessed by the application.
|
||||
* \retval VX_ERROR_INVALID_REFERENCE The tensor reference is not actually an tensor reference.
|
||||
* \retval VX_ERROR_INVALID_PARAMETERS An other parameter is incorrect.
|
||||
* \ingroup group_object_tensor
|
||||
*/
|
||||
VX_API_ENTRY vx_status VX_API_CALL vxCopyTensorPatch2(vx_tensor tensor, vx_size number_of_dims, const vx_size * view_start, const vx_size * view_end,
|
||||
const vx_tensorpatch_addressing_t * addressing, vx_size size_of_addressing, void * user_ptr, vx_enum usage, vx_enum user_memory_type);
|
||||
|
||||
/*! \brief Allows the application to get direct access to a patch of tensor object.
|
||||
* \param [in] tensor The reference to the tensor object that is the source or the
|
||||
* destination for direct access.
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ enum vx_library_e {
|
|||
* \ingroup group_kernel
|
||||
*/
|
||||
enum vx_kernel_e {
|
||||
|
||||
/*!
|
||||
* \brief The Color Space conversion kernel.
|
||||
* \details The conversions are based on the <tt>\ref vx_df_image_e</tt> code in the images.
|
||||
|
|
@ -498,6 +497,8 @@ enum vx_kernel_e {
|
|||
|
||||
VX_KERNEL_NN_CONV_3D_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x30,
|
||||
|
||||
VX_KERNEL_NN_DECONV_3D_LAYER = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x31,
|
||||
|
||||
VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -166,4 +166,34 @@ VX_CONV_3D_API_SUPPORT is used to declare that vsi openvx driver can support con
|
|||
#define VX_CONV_3D_API_SUPPORT 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
VX_DECONV_3D_API_SUPPORT is used to declare that vsi openvx driver can support deconv3d by vxDeconv3dLayer API.
|
||||
[value]
|
||||
0: not support
|
||||
1: support
|
||||
*/
|
||||
#ifndef VX_DECONV_3D_API_SUPPORT
|
||||
#define VX_DECONV_3D_API_SUPPORT 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
VX_PAD_CONST_SUPPORT is used to declare that openvx can support pad_const for tensorpad and convolution.
|
||||
[value]
|
||||
0: not support
|
||||
1: support
|
||||
*/
|
||||
#ifndef VX_PAD_CONST_SUPPORT
|
||||
#define VX_PAD_CONST_SUPPORT 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
VX_TENSOR_STRIDE_X_BITS_SUPPORT is used to declare that openvx can support tensor which bits of stride in x dimension is not an integer number of bytes.
|
||||
[value]
|
||||
0: not support
|
||||
1: support
|
||||
*/
|
||||
#ifndef VX_TENSOR_STRIDE_X_BITS_SUPPORT
|
||||
#define VX_TENSOR_STRIDE_X_BITS_SUPPORT 1
|
||||
#endif
|
||||
|
||||
#endif /* __VX_KHR_COMPATIBLE_H__ */
|
||||
|
|
|
|||
|
|
@ -337,6 +337,30 @@ typedef struct _vx_nn_convolution_3d_params_t
|
|||
vx_int32 depth_multiplier; /*!< \brief depthwise multiplier value, if 0, means convolution, elsewise(>=1), the convolution is depthwiseconvolution. */
|
||||
}vx_nn_convolution_3d_params_t;
|
||||
|
||||
typedef struct _vx_nn_deconvolution_3d_params_t
|
||||
{
|
||||
vx_int32 padding_w_left; /*!< \brief Number of elements subtracted at left of the w dimension of the input. */
|
||||
vx_int32 padding_w_right; /*!< \brief Number of elements subtracted at right of the w dimension of the input. */
|
||||
vx_int32 padding_h_top; /*!< \brief Number of elements subtracted at top of the h dimension of the input. */
|
||||
vx_int32 padding_h_bottom; /*!< \brief Number of elements subtracted at bottom of the h dimension of the input. */
|
||||
vx_int32 padding_d_front; /*!< \brief Number of elements subtracted at front of the d dimension of the input. */
|
||||
vx_int32 padding_d_rear; /*!< \brief Number of elements subtracted at end of the d dimension of the input. */
|
||||
|
||||
vx_int32 stride_w; /*!< \brief inter 0 between input elements at w direction for down scale. */
|
||||
vx_int32 stride_h; /*!< \brief inter 0 between input elements at h direction for down scale. */
|
||||
vx_int32 stride_d; /*!< \brief inter 0 between input elements at d direction for down scale. */
|
||||
|
||||
vx_int32 a_w; /*!< \brief user-specified quantity used to distinguish between the \f$upscale_w\f$ different possible output sizes. */
|
||||
vx_int32 a_h; /*!< \brief user-specified quantity used to distinguish between the \f$upscale_h\f$ different possible output sizes. */
|
||||
vx_int32 a_d; /*!< \brief user-specified quantity used to distinguish between the \f$upscale_d\f$ different possible output sizes. */
|
||||
|
||||
vx_int32 channel_group; /*!< \brief Number of separate groups for deconvolution (Range: 0 <= groups <= size of z dimension of input; size of z dimension of input can be divided by groups) */
|
||||
|
||||
vx_enum overflow_policy; /*!< \brief A <tt> VX_TYPE_ENUM</tt> of the <tt> vx_convert_policy_e</tt> enumeration. */
|
||||
vx_enum rounding_policy; /*!< \brief A <tt> VX_TYPE_ENUM</tt> of the <tt> vx_round_policy_e</tt> enumeration. */
|
||||
vx_enum down_scale_size_rounding; /*!< \brief Rounding method for calculating output dimensions. See <tt>\ref vx_nn_rounding_type_e</tt> */
|
||||
}vx_nn_deconvolution_3d_params_t;
|
||||
|
||||
/*==============================================================================
|
||||
TENSOR DATA FUNCTIONS
|
||||
=============================================================================*/
|
||||
|
|
@ -1639,7 +1663,6 @@ typedef struct _vx_nn_pad_params_t
|
|||
vx_uint8 numViewDimensions; /*!< \brief The size of two arrays. */
|
||||
vx_enum pad_mode; /*!< \brief A VX_TYPE_ENUM of the <tt>\ref vx_pad_mode_e</tt> enumeration. */
|
||||
vx_scalar pad_const; /*!< \brief The order const value if setting pad mode to const, the const value is base value, not quantized value. */
|
||||
|
||||
} vx_nn_pad_params_t, * vx_nn_pad_params;
|
||||
|
||||
|
||||
|
|
@ -2072,6 +2095,65 @@ VX_API_ENTRY vx_status VX_API_CALL vxQueryHardwareCaps(
|
|||
*/
|
||||
VX_API_ENTRY vx_node VX_API_CALL vxConv3dLayer(vx_graph graph, vx_tensor inputs, vx_tensor weights, vx_tensor biases, const vx_nn_convolution_3d_params_t *convolution_params, vx_size size_of_convolution_params, vx_tensor outputs);
|
||||
|
||||
/*! \brief [Graph] Creates a Convolutional Network Deconvolution3d Layer Node.
|
||||
* \details Deconvolution denote a sort of reverse convolution, which importantly and confusingly is not actually a proper mathematical deconvolution.
|
||||
* Convolutional Network Deconvolution is up-sampling of an image by learned Deconvolution coefficients.
|
||||
* The operation is similar to convolution but can be implemented by up-sampling the inputs with zeros insertions between the inputs,
|
||||
* and convolving the Deconvolution kernels on the up-sampled result.
|
||||
* For fixed-point data types, a fixed point calculation is performed with round and saturate according to the number of accumulator bits. The number of the accumulator bits are implementation defined,
|
||||
* and should be at least 16.\n
|
||||
* round: rounding according the <tt>vx_round_policy_e</tt> enumeration. \n
|
||||
* saturate: A saturation according the <tt>vx_convert_policy_e</tt> enumeration.
|
||||
* The following equation is implemented: \n
|
||||
* \f$ outputs[j,k,i] = saturate(round(\sum_{l} \sum_{m,n}(inputs_{upscaled}[j+m,k+n,l] \times weights[m,n,l,i])+biasses[j,k,i])) \f$\n
|
||||
* Where \f$m,n\f$ are indexes on the convolution matrices. \f$ l\f$ is an index on all the convolutions per input.\f$ i\f$ is an index per output.
|
||||
* \f$ j,k \f$ are the inputs/outputs spatial indexes.
|
||||
* Deconvolution is done on the width and height dimensions of the <tt>\ref vx_tensor</tt>. Therefore, we use here the term x for the width dimension and y for the height dimension.\n
|
||||
* before the Deconvolution is done, up-scaling the width and height dimensions with zeros is performed.
|
||||
* The relation between input to output is as follows: \n
|
||||
* \f$ width_{output} = (width_{input} -1) * upscale_x - 2 * padding_x + kernel_x + a_x \f$\n
|
||||
* and \n
|
||||
* \f$ height_{output} = (height_{input} - 1) * upscale_y - 2 * padding_y + kernel_y + a_y \f$\n
|
||||
* \f$ depth_{output} = (depth_{input} - 1) * upscale_d - 2 * padding_d + kernel_d + a_d \f$\n
|
||||
* where
|
||||
* \f$width_{input}\f$ is the size of the input width dimension.
|
||||
* \f$height_{input}\f$ is the size of the input height dimension.
|
||||
* \f$depth_{input}\f$ is the size of the input depth dimension.
|
||||
*
|
||||
* \f$width_{output}\f$ is the size of the output width dimension.
|
||||
* \f$height_{output}\f$ is the size of the output height dimension.
|
||||
* \f$depth_{output}\f$ is the size of the output depth dimension.
|
||||
*
|
||||
* \f$kernel_x\f$, \f$kernel_y\f$ and \f$kernel_d\f$ are the deconvolutioned sizes in width, height and depth.
|
||||
* \f$a_x\f$ and \f$a_y\f$ are user-specified quantity used to distinguish between the \f$upscale_x\f$ and \f$upscale_y\f$ different possible output sizes.
|
||||
* \f$upscale_x\f$, \f$upscale_y\f$ and \f$upscale_d\f$ are calculated by the relation between input and output.
|
||||
* \f$a_x\f$ and \f$a_y\f$ must be positive and smaller then \f$upscale_x\f$ and \f$upscale_y\f$ respectively.
|
||||
* Since the padding parameter is on the output. The effective input padding is: \n
|
||||
* \f$ padding_{input_x} = kernel_x -padding_x -1\f$ \n
|
||||
* \f$ padding_{input_y} = kernel_y -padding_y -1\f$ \n
|
||||
* \f$ padding_{input_d} = kernel_d -padding_d -1\f$ \n
|
||||
* Therfore the following constarints apply :
|
||||
* \f$kernel_x >= padding_x - 1\f$,
|
||||
* \f$kernel_y >= padding_y - 1\f$.
|
||||
* \f$kernel_d >= padding_d - 1\f$.
|
||||
* rounding is done according to <tt>\ref vx_nn_rounding_type_e</tt>.
|
||||
* Notice that this node creation function has more parameters than the corresponding kernel. Numbering of kernel parameters (required if you create this node using the generic interface) is explicitly specified here.
|
||||
* \param [in] graph The handle to the graph.
|
||||
* \param [in] inputs The input tensor. 4 lower dimensions represent a single input, and an optional 5th dimension for batch of inputs. Dimension layout is [width, height, depth, #IFM, #batches].
|
||||
* See <tt>\ref vxCreateTensor2</tt> and <tt>\ref vxCreateVirtualTensor2</tt>.
|
||||
* Implementations must support input tensor data types indicated by the extension strings 'KHR_NN_8' or 'KHR_NN_8 KHR_NN_16'. (Kernel parameter #0)
|
||||
* \param [in] weights [static] The 5d weights with dimensions [width, height, depth, #IFM, #OFM]. See <tt>\ref vxCreateTensor2</tt> and <tt>\ref vxCreateVirtualTensor2</tt>. (Kernel parameter #1)
|
||||
* \param [in] biases [static] Optional, ignored if NULL. The biases have one dimension [#OFM]. Implementations must support input tensor data type same as the inputs. (Kernel parameter #2)
|
||||
* \param [in] deconvolution_params [static] Pointer to parameters of type <tt>\ref vx_nn_deconvolution_params_t</tt> (Kernel parameter #3)
|
||||
* \param [in] size_of_deconv_params [static] Size in bytes of deconvolution_params. Note that this parameter is not counted as one of the kernel parameters.
|
||||
* \param [out] outputs The output tensor. The output has the same number of dimensions as the input. (Kernel parameter #4)
|
||||
* \ingroup group_cnn
|
||||
* \return <tt> vx_node</tt>.
|
||||
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a
|
||||
* successful creation should be checked using <tt>\ref vxGetStatus</tt>.
|
||||
*/
|
||||
VX_API_ENTRY vx_node VX_API_CALL vxDeconv3dLayer(vx_graph graph, vx_tensor inputs, vx_tensor weights, vx_tensor biases, const vx_nn_deconvolution_3d_params_t *convolution_params, vx_size size_of_deconv_params, vx_tensor outputs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -304,6 +304,39 @@ typedef struct _vx_tensor_view_t * vx_tensor_view;
|
|||
*/
|
||||
typedef struct _vx_tensor_addressing_t * vx_tensor_addressing;
|
||||
|
||||
/*!
|
||||
* \brief The addressing image patch structure is used by the Host only
|
||||
* to address pixels in an image patch. The fields of the structure are defined as:
|
||||
* \arg dim - The dimensions of the image in logical pixel units in the x & y direction.
|
||||
* \arg stride - The physical byte distance from a logical pixel to the next
|
||||
* logically adjacent pixel in the positive x or y direction.
|
||||
* \arg scale - The relationship of scaling from the primary plane (typically
|
||||
* the zero indexed plane) to this plane. An integer down-scaling factor of \f$ f \f$ shall be
|
||||
* set to a value equal to \f$ scale = \frac{unity}{f} \f$ and an integer up-scaling factor of \f$ f \f$
|
||||
* shall be set to a value of \f$ scale = unity * f \f$. \f$ unity \f$ is defined as <tt>\ref VX_SCALE_UNITY</tt>.
|
||||
* \arg step - The step is the number of logical pixel units to skip to
|
||||
* arrive at the next physically unique pixel. For example, on a plane that is
|
||||
* half-scaled in a dimension, the step in that dimension is 2 to indicate that
|
||||
* every other pixel in that dimension is an alias. This is useful in situations
|
||||
* where iteration over unique pixels is required, such as in serializing
|
||||
* or de-serializing the image patch information.
|
||||
* \see <tt>\ref vxMapImagePatch</tt>
|
||||
* \ingroup group_image
|
||||
*/
|
||||
typedef struct _vx_tensorpatch_addressing_t {
|
||||
vx_uint32 num_of_dims; /*!< \brief Width of patch in X dimension in pixels. */
|
||||
vx_size *dim_sizes; /*!< \brief Pointer to dimensions array */
|
||||
vx_size *strides; /*!< \brief Pointer to strides array */
|
||||
vx_uint16 stride_x_bits; /*!< \brief Stride in X dimension in bits. Used when stride_x is not an integer number of bytes. */
|
||||
} vx_tensorpatch_addressing_t;
|
||||
|
||||
/*! \brief The addressing of a tensor patch structure is used by the Host only
|
||||
* to address elements in a tensor view patch.
|
||||
* \see <tt>\ref vxCopyTensorPatch2</tt>
|
||||
* \ingroup group_tensor
|
||||
*/
|
||||
typedef struct _vx_tensorpatch_addressing_t * vx_trensor_addressing;
|
||||
|
||||
/*! \brief The weight bias parameter for fused layers
|
||||
* \ingroup group_cnn
|
||||
*/
|
||||
|
|
@ -437,6 +470,8 @@ enum vx_type_e {
|
|||
/* \todo add new object types here */
|
||||
VX_TYPE_BFLOAT16 = 0x81A,/*!< \brief A <tt>\ref vx_bfloat16</tt>. */
|
||||
|
||||
VX_TYPE_INT4 = 0x81C,/*!< \brief A <tt>\ref signed 4bits tensor.</tt>. */
|
||||
VX_TYPE_UINT4 = 0x81D,/*!< \brief A <tt>\ref unsigned 4bits tensor.</tt>. */
|
||||
};
|
||||
|
||||
/*! \brief The enumeration of all status codes.
|
||||
|
|
|
|||
|
|
@ -53,17 +53,19 @@ VX_API_ENTRY vx_status VX_API_CALL vxSysSetVipFrequency(
|
|||
vx_uint32 shaderFscaleValue
|
||||
);
|
||||
|
||||
/*! \brief cancel all VIP processing jobs.
|
||||
/*! \brief cancel all VIP processing jobs on a device.
|
||||
* \param [in] context The reference to the implementation context.
|
||||
* \param [in] deviceID bound to graph.
|
||||
* \return A <tt>\ref vx_status_e</tt> enumeration.
|
||||
* \retval VX_SUCCESS Cancelled all VIP processing job successfully
|
||||
* \retval VX_SUCCESS Cancelled all VIP processing job successfully on a device
|
||||
* and user can check return of vxProcessGraph() to get cancelled status.
|
||||
* \retval VX_ERROR_INVAID_PARAMETERS Invalid context reference.
|
||||
* \retval VX_ERROR_NOT_SUPPORTED Hardware does not support job cancellation.
|
||||
* \retval VX_FAILURE Failed to cancel VIP proccessing job.
|
||||
* \retval VX_FAILURE Failed to cancel VIP proccessing job on a device.
|
||||
*/
|
||||
VX_API_ENTRY vx_status VX_API_CALL vxSysCancelJob(
|
||||
vx_context context
|
||||
vx_context context,
|
||||
vx_uint32 deviceID
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -168,3 +168,7 @@ DEF_OP(CONV3D)
|
|||
DEF_OP(DECONV3D)
|
||||
DEF_OP(PAD2)
|
||||
DEF_OP(COS)
|
||||
DEF_OP(PRE_PROCESS_RGB888_PLANAR)
|
||||
DEF_OP(GATHER_ELEMENTS)
|
||||
DEF_OP(SELU)
|
||||
DEF_OP(CELU)
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ typedef int32_t vsi_nn_kernel_lut_act_e; enum
|
|||
VSI_NN_KERNEL_LUT_MISH = 1,
|
||||
VSI_NN_KERNEL_LUT_LOG = 2,
|
||||
VSI_NN_KERNEL_LUT_EXP = 3,
|
||||
VSI_NN_KERNEL_LUT_ELU = 4,
|
||||
VSI_NN_KERNEL_LUT_SELU = 4,
|
||||
VSI_NN_KERNEL_LUT_NEG = 5,
|
||||
VSI_NN_KERNEL_LUT_HSIGMOID = 6,
|
||||
VSI_NN_KERNEL_LUT_SOFT_PLUS = 7,
|
||||
|
|
@ -45,6 +45,7 @@ typedef int32_t vsi_nn_kernel_lut_act_e; enum
|
|||
VSI_NN_KERNEL_LUT_RELU_KERAS = 11,
|
||||
VSI_NN_KERNEL_LUT_CLIP = 12,
|
||||
VSI_NN_KERNEL_LUT_SQUARE = 13,
|
||||
VSI_NN_KERNEL_LUT_CELU = 14,
|
||||
};
|
||||
|
||||
#define VSI_NN_KERNEL_LUT_MAX_SIZE (1024)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_CELU_H
|
||||
#define _VSI_NN_OP_CELU_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_celu_param
|
||||
{
|
||||
struct _celu_local_data_t* local;
|
||||
// Add parameters here
|
||||
float alpha;
|
||||
} vsi_nn_celu_param;
|
||||
_compiler_assert(offsetof(vsi_nn_celu_param, local) == 0, \
|
||||
vsi_nn_celu_h );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -81,12 +81,7 @@ typedef struct _vsi_nn_clip_lcl_data
|
|||
|
||||
typedef struct _vsi_nn_clip_lcl2_data
|
||||
{
|
||||
uint32_t hash_idx;
|
||||
vsi_bool execute_on_sw;
|
||||
vsi_bool enable_image_2d;
|
||||
uint32_t sizes0[VSI_NN_MAX_DIM_NUM];
|
||||
uint32_t sizes1[VSI_NN_MAX_DIM_NUM];
|
||||
uint32_t dim_num;
|
||||
vsi_bool is_internal_node;
|
||||
} vsi_nn_clip_lcl2_data;
|
||||
|
||||
typedef struct _vsi_nn_clip_param
|
||||
|
|
@ -103,4 +98,3 @@ typedef struct _vsi_nn_clip_param
|
|||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_GATHER_ELEMENTS_H
|
||||
#define _VSI_NN_OP_GATHER_ELEMENTS_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_gather_elements_param
|
||||
{
|
||||
struct _gather_elements_local_data_t* local;
|
||||
// Add parameters here
|
||||
int32_t axis;
|
||||
} vsi_nn_gather_elements_param;
|
||||
_compiler_assert(offsetof(vsi_nn_gather_elements_param, local) == 0, \
|
||||
vsi_nn_gather_elements_h );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_PRE_PROCESS_RGB888_PLANAR_H
|
||||
#define _VSI_NN_OP_PRE_PROCESS_RGB888_PLANAR_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_pre_process_rgb888_planar_param
|
||||
{
|
||||
struct _pre_process_rgb888_planar_local_data_t* local;
|
||||
// Add parameters here
|
||||
struct
|
||||
{
|
||||
uint32_t left;
|
||||
uint32_t top;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
} rect;
|
||||
|
||||
struct
|
||||
{
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
float r_mean;
|
||||
float g_mean;
|
||||
float b_mean;
|
||||
float scale;
|
||||
} vsi_nn_pre_process_rgb888_planar_param;
|
||||
_compiler_assert(offsetof(vsi_nn_pre_process_rgb888_planar_param, local) == 0, \
|
||||
vsi_nn_pre_process_rgb888_planar_h );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_SELU_H
|
||||
#define _VSI_NN_OP_SELU_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _vsi_nn_selu_param
|
||||
{
|
||||
struct _selu_local_data_t* local;
|
||||
// Add parameters here
|
||||
float alpha;
|
||||
float gamma;
|
||||
} vsi_nn_selu_param;
|
||||
_compiler_assert(offsetof(vsi_nn_selu_param, local) == 0, \
|
||||
vsi_nn_selu_h );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -56,6 +56,7 @@ extern "C" {
|
|||
static inline vsi_##NAME##_array_t * vsi_##NAME##_array_create( size_t size ) { \
|
||||
vsi_##NAME##_array_t * array = (vsi_##NAME##_array_t *)malloc( \
|
||||
sizeof(vsi_##NAME##_array_t) + sizeof(TYPE) * size ); \
|
||||
if (array == NULL) return NULL; \
|
||||
array->size = size; \
|
||||
return array; \
|
||||
} \
|
||||
|
|
@ -205,6 +206,14 @@ static inline double vsi_rint
|
|||
return inter;
|
||||
} /* vsi_rint() */
|
||||
|
||||
/**
|
||||
* Computes an approximation of the error function.
|
||||
* This is the same approximation used by Eigen.
|
||||
*
|
||||
* @param[in] the value for input float.
|
||||
*/
|
||||
float vsi_nn_erf_impl(float x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -58,6 +58,41 @@ extern "C" {
|
|||
|
||||
#define BITS_PER_BYTE 8
|
||||
|
||||
#define VSI_NN_STRINGIZE(X) VSI_NN_DO_STRINGIZE(X)
|
||||
#define VSI_NN_DO_STRINGIZE(X) #X
|
||||
|
||||
#define VSI_NN_JOIN(X, Y) VSI_NN_DO_JOIN(X, Y)
|
||||
#define VSI_NN_DO_JOIN(X, Y) VSI_NN_DO_JOIN2(X,Y)
|
||||
#define VSI_NN_DO_JOIN2(X, Y) X##Y
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define VSI_NN_DEPRECATED(symbol, hints) \
|
||||
__declspec(deprecated(VSI_NN_STRINGIZE(hints))) symbol
|
||||
|
||||
#define VSI_NN_SUPPRESS_DEPRECATED_BEGIN \
|
||||
__pragma(warning( push )) \
|
||||
__pragma(warning(disable : 4996))
|
||||
#define VSI_NN_SUPPRESS_DEPRECATED_END \
|
||||
__pragma(warning(pop))
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
#define VSI_NN_DEPRECATED(symbol, hints) \
|
||||
symbol __attribute__((deprecated(VSI_NN_STRINGIZE(hints))))
|
||||
|
||||
#define VSI_NN_SUPPRESS_DEPRECATED_BEGIN \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
|
||||
|
||||
#define VSI_NN_SUPPRESS_DEPRECATED_END \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
#else
|
||||
#define VSI_NN_DEPRECATED(symbol, hints) \
|
||||
symbol
|
||||
|
||||
#define VSI_NN_SUPPRESS_DEPRECATED_BEGIN
|
||||
#define VSI_NN_SUPPRESS_DEPRECATED_END
|
||||
#endif
|
||||
|
||||
/*-------------------------------------------
|
||||
Functions
|
||||
-------------------------------------------*/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,56 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VIP_VIRTUAL_DEVICE_H
|
||||
#define _VIP_VIRTUAL_DEVICE_H
|
||||
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
struct _vsi_nn_graph;
|
||||
typedef struct _vsi_nn_graph vsi_nn_graph_t;
|
||||
|
||||
namespace vip {
|
||||
|
||||
class Device;
|
||||
using func_t = std::function<bool (const void*)>;
|
||||
using data_t = const void*;
|
||||
|
||||
class IDevice {
|
||||
public:
|
||||
IDevice(uint32_t id);
|
||||
~IDevice();
|
||||
uint32_t Id() const;
|
||||
bool GraphSubmit(vsi_nn_graph_t* graph, func_t func, data_t data);
|
||||
bool GraphRemove(const vsi_nn_graph_t* graph);
|
||||
bool ThreadExit();
|
||||
bool ThreadIdle();
|
||||
void WaitThreadIdle();
|
||||
|
||||
protected:
|
||||
Device* device_;
|
||||
};
|
||||
|
||||
} // namespace vip
|
||||
|
||||
#endif
|
||||
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_assert.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "ops/vsi_nn_op_activations.h"
|
||||
#include "ops/vsi_nn_op_batch_norm.h"
|
||||
#include "ops/vsi_nn_op_multiply.h"
|
||||
|
|
@ -185,6 +186,10 @@
|
|||
#include "ops/vsi_nn_op_deconv3d.h"
|
||||
#include "ops/vsi_nn_op_reduce_mean_internal.h"
|
||||
#include "ops/vsi_nn_op_pad2.h"
|
||||
#include "ops/vsi_nn_op_pre_process_rgb888_planar.h"
|
||||
#include "ops/vsi_nn_op_gather_elements.h"
|
||||
#include "ops/vsi_nn_op_selu.h"
|
||||
#include "ops/vsi_nn_op_celu.h"
|
||||
/* custom node head define define */
|
||||
#include "custom/vsi_nn_custom_node_type.h"
|
||||
|
||||
|
|
@ -210,7 +215,7 @@ typedef union _vsi_nn_nn_param
|
|||
vsi_nn_multiply_param multiply;
|
||||
vsi_nn_proposal_param proposal;
|
||||
vsi_nn_deconv_param deconv;
|
||||
vsi_nn_reshape_param reshape;
|
||||
vsi_nn_reshape_param VSI_NN_DEPRECATED(reshape, "Replace with reshape2");
|
||||
vsi_nn_permute_param permute;
|
||||
vsi_nn_upsample_param upsample;
|
||||
vsi_nn_resize_param resize;
|
||||
|
|
@ -356,6 +361,10 @@ typedef union _vsi_nn_nn_param
|
|||
vsi_nn_deconv3d_param deconv3d;
|
||||
vsi_nn_reduce_mean_internal_param reduce_mean_internal;
|
||||
vsi_nn_pad2_param pad2;
|
||||
vsi_nn_pre_process_rgb888_planar_param pre_process_rgb888_planar;
|
||||
vsi_nn_gather_elements_param gather_elements;
|
||||
vsi_nn_selu_param selu;
|
||||
vsi_nn_celu_param celu;
|
||||
void* client_param;
|
||||
|
||||
/* custom node data struct define */
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ typedef enum
|
|||
VSI_NN_SOURCE_FORMAT_IMAGE_RGB888_PLANAR,
|
||||
VSI_NN_SOURCE_FORMAT_IMAGE_YUV444,
|
||||
VSI_NN_SOURCE_FORMAT_IMAGE_NV12,
|
||||
VSI_NN_SOURCE_FORMAT_IMAGE_RGB888_PLANAR_SEP,
|
||||
} vsi_nn_preprocess_source_format_e;
|
||||
|
||||
/**
|
||||
|
|
@ -235,6 +236,13 @@ OVXLIB_API vsi_status vsi_nn_AddGraphPostProcess
|
|||
uint32_t count
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_status vsi_nn_AddBinaryGraphInputsWithCropParam
|
||||
(
|
||||
vsi_nn_graph_t* graph,
|
||||
vsi_nn_node_id_t* enable_nodes,
|
||||
uint32_t enable_nodes_count
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ extern "C"{
|
|||
|
||||
#define VSI_NN_VERSION_MAJOR 1
|
||||
#define VSI_NN_VERSION_MINOR 1
|
||||
#define VSI_NN_VERSION_PATCH 39
|
||||
#define VSI_NN_VERSION_PATCH 43
|
||||
#define VSI_NN_VERSION \
|
||||
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ __BEGIN_DECLS
|
|||
#define CLIP_HASH_KEY( IN_DTYPE, OUT_DTYPE, _image_2d ) \
|
||||
(( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) | (_image_2d))
|
||||
|
||||
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
|
||||
#define PACK_KERNEL_MAP_3D( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ CLIP_HASH_KEY( IN_DTYPE, OUT_DTYPE, 0 ), \
|
||||
CVIVANTE_NAMESPACE("cl.clip_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
|
||||
_CLIP_KERNEL_SOURCE(IN_DTYPE) }
|
||||
|
|
@ -64,15 +64,21 @@ typedef struct
|
|||
|
||||
static const _kernel_map_type _clip_kernel_map[] =
|
||||
{
|
||||
PACK_KERNEL_MAP(F32, F32),
|
||||
PACK_KERNEL_MAP(F32, U8),
|
||||
PACK_KERNEL_MAP(U8, U8),
|
||||
PACK_KERNEL_MAP(U8, F32),
|
||||
PACK_KERNEL_MAP(BF16, BF16),
|
||||
PACK_KERNEL_MAP_3D(F32, F32),
|
||||
PACK_KERNEL_MAP_3D(F32, U8),
|
||||
PACK_KERNEL_MAP_3D(F32, I32),
|
||||
PACK_KERNEL_MAP_3D(U8, U8),
|
||||
PACK_KERNEL_MAP_3D(U8, F32),
|
||||
PACK_KERNEL_MAP_3D(I32, I32),
|
||||
PACK_KERNEL_MAP_3D(I32, F32),
|
||||
PACK_KERNEL_MAP_3D(BF16, BF16),
|
||||
PACK_KERNEL_MAP_2D(F32, F32),
|
||||
PACK_KERNEL_MAP_2D(F32, U8),
|
||||
PACK_KERNEL_MAP_2D(F32, I32),
|
||||
PACK_KERNEL_MAP_2D(U8, U8),
|
||||
PACK_KERNEL_MAP_2D(U8, F32),
|
||||
PACK_KERNEL_MAP_2D(I32, I32),
|
||||
PACK_KERNEL_MAP_2D(I32, F32),
|
||||
PACK_KERNEL_MAP_2D(BF16, BF16),
|
||||
};
|
||||
|
||||
|
|
@ -100,9 +106,6 @@ static vx_param_description_t _clip_kernel_param_def[] =
|
|||
#define SCALAR_OUTPUT_SCALE (6)
|
||||
#define SCALAR_OUTPUT_TAIL (7)
|
||||
|
||||
#define CLIP_PARAM_NUM 4
|
||||
#define CLIP_QUANT_PARAM_NUM _cnt_of_array( _clip_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
|
|
@ -149,8 +152,6 @@ final:
|
|||
return status;
|
||||
} /* _clip_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
|
|
@ -159,8 +160,7 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
vsi_bool image_2d,
|
||||
vsi_bool *is_use_u8_kernel
|
||||
vsi_bool image_2d
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
|
|
@ -178,28 +178,38 @@ static vsi_status _query_kernel
|
|||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (F16 == in_dtype)
|
||||
{
|
||||
in_dtype = F32;
|
||||
}
|
||||
#define _PACK_SELECT_KEY( in_type, out_type ) \
|
||||
( ( in_type ) | ( out_type << 8 ))
|
||||
|
||||
if (F16 == out_dtype)
|
||||
switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
|
||||
{
|
||||
out_dtype = F32;
|
||||
}
|
||||
|
||||
if ((U8 == in_dtype) || (U8 == out_dtype))
|
||||
{
|
||||
param_def_size = CLIP_QUANT_PARAM_NUM;
|
||||
*is_use_u8_kernel = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
param_def_size = CLIP_PARAM_NUM;
|
||||
*is_use_u8_kernel = FALSE;
|
||||
}
|
||||
|
||||
case _PACK_SELECT_KEY(F32, F32):
|
||||
case _PACK_SELECT_KEY(F16, F16):
|
||||
key = CLIP_HASH_KEY( F32, F32, image_2d );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(F32, I8):
|
||||
case _PACK_SELECT_KEY(F16, I16):
|
||||
case _PACK_SELECT_KEY(F16, I32):
|
||||
key = CLIP_HASH_KEY( F32, I32, image_2d );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, I8):
|
||||
case _PACK_SELECT_KEY(I16, I16):
|
||||
case _PACK_SELECT_KEY(I32, I32):
|
||||
key = CLIP_HASH_KEY( I32, I32, image_2d );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I8, F16):
|
||||
case _PACK_SELECT_KEY(I16, F16):
|
||||
case _PACK_SELECT_KEY(I32, F16):
|
||||
case _PACK_SELECT_KEY(I8, F32):
|
||||
case _PACK_SELECT_KEY(I16, F32):
|
||||
case _PACK_SELECT_KEY(I32, F32):
|
||||
key = CLIP_HASH_KEY( I32, F32, image_2d );
|
||||
break;
|
||||
default:
|
||||
key = CLIP_HASH_KEY( in_dtype, out_dtype, image_2d );
|
||||
break;
|
||||
}
|
||||
#undef _PACK_SELECT_KEY
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
|
|
@ -246,7 +256,6 @@ static vsi_nn_kernel_node_t _setup
|
|||
float outputTail = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
vsi_bool is_use_u8_kernel = FALSE;
|
||||
float min_value = vsi_nn_kernel_param_get_float32( params, "min_value" );
|
||||
float max_value = vsi_nn_kernel_param_get_float32( params, "max_value" );
|
||||
|
||||
|
|
@ -261,12 +270,10 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
image_2d = (inputs[0]->attr.dim_num == 2 || inputs[0]->attr.size[2] == 1);
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, image_2d, &is_use_u8_kernel);
|
||||
status = _query_kernel( kernel, inputs, outputs, image_2d);
|
||||
|
||||
if ( VSI_SUCCESS == status )
|
||||
{
|
||||
size_t node_params_num = CLIP_PARAM_NUM;
|
||||
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
|
|
@ -275,28 +282,21 @@ static vsi_nn_kernel_node_t _setup
|
|||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCALAR_MIN_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &min_value );
|
||||
node_params[SCALAR_MAX_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &max_value );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
node_params[SCALAR_INPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &inputTail );
|
||||
node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
node_params[SCALAR_OUTPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &outputTail );
|
||||
node_params_num = CLIP_QUANT_PARAM_NUM;
|
||||
}
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _CLIP_PARAM_NUM );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MIN_VALUE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_MAX_VALUE] );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
|
||||
}
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -45,13 +45,14 @@ typedef enum
|
|||
UNARY_COS,
|
||||
UNARY_EXP,
|
||||
UNARY_LOG,
|
||||
UNARY_ELU,
|
||||
UNARY_NEG,
|
||||
UNARY_HSIGMOID,
|
||||
UNARY_MISH,
|
||||
UNARY_ROUND,
|
||||
UNARY_GELU,
|
||||
UNARY_HGELU
|
||||
UNARY_HGELU,
|
||||
UNARY_SELU,
|
||||
UNARY_CELU,
|
||||
} unary_type_e;
|
||||
|
||||
/*
|
||||
|
|
@ -60,16 +61,18 @@ typedef enum
|
|||
#define HASH_UNARY_KEY(_type, _input_type, _output_type, _image_2d) \
|
||||
((_type << 20) | (_input_type << 12) | (_output_type << 4) | (_image_2d))
|
||||
|
||||
#define VSI_NN_GEN_UNARY_KERNEL_SOURCE_NAME() \
|
||||
"eltwise_unary"
|
||||
#define _UNARY_KERNEL_SOURCE0_NAME() \
|
||||
"eltwise_unary_0"
|
||||
#define _UNARY_KERNEL_SOURCE1_NAME() \
|
||||
"eltwise_unary_1"
|
||||
|
||||
#define HASH_UNARY_SH_KERNEL_NAME(FUNC_NAME, SRC_TYPE, DST_TYPE) \
|
||||
CVIVANTE_NAMESPACE("cl."#FUNC_NAME"_"#SRC_TYPE"to"#DST_TYPE)
|
||||
|
||||
#define TENSOR_UNARY_KERNELS(FUNC_NAME, TYPE, SRC_TYPE, OUT_TYPE) \
|
||||
#define TENSOR_UNARY_KERNELS_3D(FUNC_NAME, TYPE, SRC_TYPE, OUT_TYPE) \
|
||||
{ HASH_UNARY_KEY(TYPE, SRC_TYPE, OUT_TYPE, 0), \
|
||||
HASH_UNARY_SH_KERNEL_NAME(FUNC_NAME, SRC_TYPE, OUT_TYPE), \
|
||||
VSI_NN_GEN_UNARY_KERNEL_SOURCE_NAME() },
|
||||
_UNARY_KERNEL_SOURCE1_NAME() },
|
||||
|
||||
#define HASH_UNARY_SH_KERNEL_2D_NAME(FUNC_NAME, SRC_TYPE, DST_TYPE) \
|
||||
CVIVANTE_NAMESPACE("cl."#FUNC_NAME"_"#SRC_TYPE"to"#DST_TYPE"_2D")
|
||||
|
|
@ -77,29 +80,20 @@ typedef enum
|
|||
#define TENSOR_UNARY_KERNELS_2D(FUNC_NAME, TYPE, SRC_TYPE, OUT_TYPE) \
|
||||
{ HASH_UNARY_KEY(TYPE, SRC_TYPE, OUT_TYPE, 1), \
|
||||
HASH_UNARY_SH_KERNEL_2D_NAME(FUNC_NAME, SRC_TYPE, OUT_TYPE), \
|
||||
VSI_NN_GEN_UNARY_KERNEL_SOURCE_NAME() },
|
||||
|
||||
#define TENSOR_UNARY_KERNELS_FLOAT(FUNC_NAME, TYPE, SRC_TYPE, OUT_TYPE) \
|
||||
{ HASH_UNARY_KEY(TYPE, SRC_TYPE, OUT_TYPE, 0), \
|
||||
HASH_UNARY_SH_KERNEL_NAME(FUNC_NAME, F32, F32), \
|
||||
VSI_NN_GEN_UNARY_KERNEL_SOURCE_NAME() },
|
||||
|
||||
#define TENSOR_UNARY_KERNELS_FLOAT_2D(FUNC_NAME, TYPE, SRC_TYPE, OUT_TYPE) \
|
||||
{ HASH_UNARY_KEY(TYPE, SRC_TYPE, OUT_TYPE, 1), \
|
||||
HASH_UNARY_SH_KERNEL_2D_NAME(FUNC_NAME, F32, F32), \
|
||||
VSI_NN_GEN_UNARY_KERNEL_SOURCE_NAME() },
|
||||
_UNARY_KERNEL_SOURCE0_NAME() },
|
||||
|
||||
#define SIN_OPERATION sin
|
||||
#define COS_OPERATION cos
|
||||
#define EXP_OPERATION exp
|
||||
#define LOG_OPERATION log
|
||||
#define ELU_OPERATION elu
|
||||
#define NEG_OPERATION neg
|
||||
#define HSIGMOID_OPERATION hard_sigmoid
|
||||
#define MISH_OPERATION mish
|
||||
#define ROUND_OPERATION round
|
||||
#define GELU_OPERATION gelu
|
||||
#define HGELU_OPERATION hard_gelu
|
||||
#define SELU_OPERATION selu
|
||||
#define CELU_OPERATION celu
|
||||
|
||||
static const struct {
|
||||
uint32_t key;
|
||||
|
|
@ -107,77 +101,59 @@ static const struct {
|
|||
const char* source_name;
|
||||
} kernel_map[] =
|
||||
{
|
||||
TENSOR_UNARY_KERNELS_FLOAT(SIN_OPERATION, UNARY_SIN, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(SIN_OPERATION, UNARY_SIN, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(COS_OPERATION, UNARY_COS, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(COS_OPERATION, UNARY_COS, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(EXP_OPERATION, UNARY_EXP, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(EXP_OPERATION, UNARY_EXP, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(LOG_OPERATION, UNARY_LOG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(LOG_OPERATION, UNARY_LOG, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(ELU_OPERATION, UNARY_ELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(ELU_OPERATION, UNARY_ELU, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(NEG_OPERATION, UNARY_NEG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(NEG_OPERATION, UNARY_NEG, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(HSIGMOID_OPERATION, UNARY_HSIGMOID, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(HSIGMOID_OPERATION, UNARY_HSIGMOID, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(MISH_OPERATION, UNARY_MISH, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(MISH_OPERATION, UNARY_MISH, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(ROUND_OPERATION, UNARY_ROUND, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(ROUND_OPERATION, UNARY_ROUND, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(GELU_OPERATION, UNARY_GELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(GELU_OPERATION, UNARY_GELU, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(HGELU_OPERATION, UNARY_HGELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT(HGELU_OPERATION, UNARY_HGELU, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_3D(SIN_OPERATION, UNARY_SIN, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(COS_OPERATION, UNARY_COS, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(EXP_OPERATION, UNARY_EXP, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(LOG_OPERATION, UNARY_LOG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(NEG_OPERATION, UNARY_NEG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(HSIGMOID_OPERATION, UNARY_HSIGMOID, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(MISH_OPERATION, UNARY_MISH, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(ROUND_OPERATION, UNARY_ROUND, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(GELU_OPERATION, UNARY_GELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(HGELU_OPERATION, UNARY_HGELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(SELU_OPERATION, UNARY_SELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_3D(CELU_OPERATION, UNARY_CELU, F32, F32)
|
||||
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(SIN_OPERATION, UNARY_SIN, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(SIN_OPERATION, UNARY_SIN, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(COS_OPERATION, UNARY_COS, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(COS_OPERATION, UNARY_COS, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(EXP_OPERATION, UNARY_EXP, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(EXP_OPERATION, UNARY_EXP, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(LOG_OPERATION, UNARY_LOG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(LOG_OPERATION, UNARY_LOG, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(ELU_OPERATION, UNARY_ELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(ELU_OPERATION, UNARY_ELU, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(NEG_OPERATION, UNARY_NEG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(NEG_OPERATION, UNARY_NEG, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(HSIGMOID_OPERATION, UNARY_HSIGMOID, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(HSIGMOID_OPERATION, UNARY_HSIGMOID, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(MISH_OPERATION, UNARY_MISH, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(MISH_OPERATION, UNARY_MISH, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(ROUND_OPERATION, UNARY_ROUND, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(ROUND_OPERATION, UNARY_ROUND, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(GELU_OPERATION, UNARY_GELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(GELU_OPERATION, UNARY_GELU, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(HGELU_OPERATION, UNARY_HGELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_FLOAT_2D(HGELU_OPERATION, UNARY_HGELU, F16, F16)
|
||||
TENSOR_UNARY_KERNELS_2D(SIN_OPERATION, UNARY_SIN, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(COS_OPERATION, UNARY_COS, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(EXP_OPERATION, UNARY_EXP, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(LOG_OPERATION, UNARY_LOG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(NEG_OPERATION, UNARY_NEG, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(HSIGMOID_OPERATION, UNARY_HSIGMOID, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(MISH_OPERATION, UNARY_MISH, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(ROUND_OPERATION, UNARY_ROUND, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(GELU_OPERATION, UNARY_GELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(HGELU_OPERATION, UNARY_HGELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(SELU_OPERATION, UNARY_SELU, F32, F32)
|
||||
TENSOR_UNARY_KERNELS_2D(CELU_OPERATION, UNARY_CELU, F32, F32)
|
||||
|
||||
TENSOR_UNARY_KERNELS(SIN_OPERATION, UNARY_SIN, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(COS_OPERATION, UNARY_COS, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(EXP_OPERATION, UNARY_EXP, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(LOG_OPERATION, UNARY_LOG, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(ELU_OPERATION, UNARY_ELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(NEG_OPERATION, UNARY_NEG, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(HSIGMOID_OPERATION, UNARY_HSIGMOID, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(MISH_OPERATION, UNARY_MISH, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(ROUND_OPERATION, UNARY_ROUND, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(GELU_OPERATION, UNARY_GELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS(HGELU_OPERATION, UNARY_HGELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(SIN_OPERATION, UNARY_SIN, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(COS_OPERATION, UNARY_COS, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(EXP_OPERATION, UNARY_EXP, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(LOG_OPERATION, UNARY_LOG, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(NEG_OPERATION, UNARY_NEG, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(HSIGMOID_OPERATION, UNARY_HSIGMOID, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(MISH_OPERATION, UNARY_MISH, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(ROUND_OPERATION, UNARY_ROUND, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(GELU_OPERATION, UNARY_GELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(HGELU_OPERATION, UNARY_HGELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(SELU_OPERATION, UNARY_SELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_3D(CELU_OPERATION, UNARY_CELU, U8, U8)
|
||||
|
||||
TENSOR_UNARY_KERNELS_2D(SIN_OPERATION, UNARY_SIN, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(COS_OPERATION, UNARY_COS, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(EXP_OPERATION, UNARY_EXP, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(LOG_OPERATION, UNARY_LOG, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(ELU_OPERATION, UNARY_ELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(NEG_OPERATION, UNARY_NEG, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(HSIGMOID_OPERATION, UNARY_HSIGMOID, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(MISH_OPERATION, UNARY_MISH, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(ROUND_OPERATION, UNARY_ROUND, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(GELU_OPERATION, UNARY_GELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(HGELU_OPERATION, UNARY_HGELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(SELU_OPERATION, UNARY_SELU, U8, U8)
|
||||
TENSOR_UNARY_KERNELS_2D(CELU_OPERATION, UNARY_CELU, U8, U8)
|
||||
|
||||
TENSOR_UNARY_KERNELS(NEG_OPERATION, UNARY_NEG, I32, I32)
|
||||
TENSOR_UNARY_KERNELS_3D(NEG_OPERATION, UNARY_NEG, I32, I32)
|
||||
|
||||
TENSOR_UNARY_KERNELS_2D(NEG_OPERATION, UNARY_NEG, I32, I32)
|
||||
};
|
||||
|
|
@ -186,13 +162,14 @@ static const struct {
|
|||
#undef COS_OPERATION
|
||||
#undef EXP_OPERATION
|
||||
#undef LOG_OPERATION
|
||||
#undef ELU_OPERATION
|
||||
#undef NEG_OPERATION
|
||||
#undef HSIGMOID_OPERATION
|
||||
#undef MISH_OPERATION
|
||||
#undef ROUND_OPERATION
|
||||
#undef GELU_OPERATION
|
||||
#undef HGELU_OPERATION
|
||||
#undef SELU_OPERATION
|
||||
#undef CELU_OPERATION
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
|
|
@ -284,7 +261,21 @@ static vsi_status _query_kernel
|
|||
|
||||
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
#define _PACK_SELECT_KEY( in_type, out_type ) \
|
||||
( ( in_type ) | ( out_type << 8 ))
|
||||
|
||||
switch (_PACK_SELECT_KEY(input_dtype, output_dtype))
|
||||
{
|
||||
case _PACK_SELECT_KEY(F32, F32):
|
||||
case _PACK_SELECT_KEY(F16, F16):
|
||||
key = HASH_UNARY_KEY( type, F32, F32, image_2d );
|
||||
break;
|
||||
default:
|
||||
key = HASH_UNARY_KEY( type, input_dtype, output_dtype, image_2d );
|
||||
break;
|
||||
}
|
||||
#undef _PACK_SELECT_KEY
|
||||
|
||||
for( i = 0; i < _cnt_of_array(kernel_map); i ++ )
|
||||
{
|
||||
|
|
@ -336,6 +327,15 @@ static vsi_nn_kernel_node_t _setup
|
|||
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
||||
float beta = vsi_nn_kernel_param_get_float32( params, "beta" );
|
||||
|
||||
if (unary_type == UNARY_SELU)
|
||||
{
|
||||
alpha = alpha * beta;
|
||||
}
|
||||
else if (unary_type == UNARY_CELU)
|
||||
{
|
||||
beta = 1.0f / alpha;
|
||||
}
|
||||
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shape, &new_rank );
|
||||
|
|
@ -450,11 +450,12 @@ REGISTER_ELTWISE_UNARY_BACKEND_CL( sin, UNARY_SIN )
|
|||
REGISTER_ELTWISE_UNARY_BACKEND_CL( cos, UNARY_COS )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( exp, UNARY_EXP )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( log, UNARY_LOG )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( elu, UNARY_ELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( neg, UNARY_NEG )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( hard_sigmoid, UNARY_HSIGMOID )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( mish, UNARY_MISH )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( round, UNARY_ROUND )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( gelu, UNARY_GELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( hard_gelu, UNARY_HGELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( selu, UNARY_SELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CL( celu, UNARY_CELU )
|
||||
__END_DECLS
|
||||
|
|
|
|||
|
|
@ -0,0 +1,282 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
INTERNAL_KERNEL_GATHER_ELEMENTS,
|
||||
} _internal_kernel_e;
|
||||
|
||||
#define _GATHER_ELEMENTS_KERNEL_SOURCE "gather_elements"
|
||||
|
||||
#define STR(a) #a
|
||||
// Add kernel hashtable here
|
||||
#define GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, IMG_2D ) \
|
||||
(( AXIS ) | ( IN0_DTYPE << 2 ) | ( IN1_DTYPE << 10 ) | ( OUT_DTYPE << 18 ) | ( IMG_2D << 26 ))
|
||||
#define PACK_KERNEL_3D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 0 ), \
|
||||
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)), \
|
||||
_GATHER_ELEMENTS_KERNEL_SOURCE}
|
||||
|
||||
#define PACK_KERNEL_2D_MAP( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE ) \
|
||||
{ GATHER_ELEMENTS_HASH_KEY( AXIS, IN0_DTYPE, IN1_DTYPE, OUT_DTYPE, 1 ), \
|
||||
CVIVANTE_NAMESPACE("cl.gather_elements_axis"STR(AXIS)"_"STR(IN0_DTYPE)"_"STR(IN1_DTYPE)"to"STR(OUT_DTYPE)"_2D"), \
|
||||
_GATHER_ELEMENTS_KERNEL_SOURCE}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _gather_elements_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
PACK_KERNEL_3D_MAP( 0, F32, I32, F32 ),
|
||||
PACK_KERNEL_3D_MAP( 0, I32, I32, I32 ),
|
||||
PACK_KERNEL_3D_MAP( 0, U32, I32, U32 ),
|
||||
PACK_KERNEL_3D_MAP( 1, F32, I32, F32 ),
|
||||
PACK_KERNEL_3D_MAP( 1, I32, I32, I32 ),
|
||||
PACK_KERNEL_3D_MAP( 1, U32, I32, U32 ),
|
||||
PACK_KERNEL_3D_MAP( 2, F32, I32, F32 ),
|
||||
PACK_KERNEL_3D_MAP( 2, I32, I32, I32 ),
|
||||
PACK_KERNEL_3D_MAP( 2, U32, I32, U32 ),
|
||||
|
||||
PACK_KERNEL_2D_MAP( 0, F32, I32, F32 ),
|
||||
PACK_KERNEL_2D_MAP( 0, I32, I32, I32 ),
|
||||
PACK_KERNEL_2D_MAP( 0, U32, I32, U32 ),
|
||||
PACK_KERNEL_2D_MAP( 1, F32, I32, F32 ),
|
||||
PACK_KERNEL_2D_MAP( 1, I32, I32, I32 ),
|
||||
PACK_KERNEL_2D_MAP( 1, U32, I32, U32 ),
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _gather_elements_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _GATHER_ELEMENTS_PARAM_NUM _cnt_of_array( _gather_elements_kernel_param_def )
|
||||
#define SCALAR_INPUT_SCALE (3)
|
||||
#define SCALAR_INPUT_TAIL (4)
|
||||
#define SCALAR_INPUT_AXIS_SIZE (5)
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_gather_elements_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
||||
out_shape = output_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
|
||||
gpu_param.global_size[0] = gpu_align_p2(
|
||||
(out_shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0], 4);
|
||||
gpu_param.global_size[1] = (
|
||||
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||
/ gpu_param.global_scale[1]);
|
||||
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
|
||||
final:
|
||||
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
|
||||
SAFE_FREE_TENSOR_ATTR(output_attr);
|
||||
return status;
|
||||
} /* _gather_elements_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
int32_t axis
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in0_dtype;
|
||||
vsi_nn_kernel_dtype_e in1_dtype;
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
const _kernel_map_type * kernel_map = _gather_elements_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _gather_elements_kernel_map );
|
||||
vx_param_description_t * param_def = _gather_elements_kernel_param_def;
|
||||
vx_kernel_initialize_f initializer = _gather_elements_initializer;
|
||||
int32_t img_2d = (outputs[0]->attr.dim_num < 3 || outputs[0]->attr.size[2] == 1) ? 1 : 0;
|
||||
uint32_t key = 0;
|
||||
uint32_t i;
|
||||
|
||||
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
#define _PACK_SELECT_KEY( in0_type, out_type ) \
|
||||
( ( in0_type ) | ( out_type << 8 ))
|
||||
|
||||
switch (_PACK_SELECT_KEY(in0_dtype, out_dtype))
|
||||
{
|
||||
case _PACK_SELECT_KEY(F32, F32):
|
||||
case _PACK_SELECT_KEY(F16, F16):
|
||||
key = GATHER_ELEMENTS_HASH_KEY( axis, F32, in1_dtype, F32, img_2d );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(U32, U32):
|
||||
case _PACK_SELECT_KEY(U16, U16):
|
||||
case _PACK_SELECT_KEY(U8, U8):
|
||||
key = GATHER_ELEMENTS_HASH_KEY( axis, U32, in1_dtype, U32, img_2d );
|
||||
break;
|
||||
case _PACK_SELECT_KEY(I32, I32):
|
||||
case _PACK_SELECT_KEY(I16, I16):
|
||||
case _PACK_SELECT_KEY(I8, I8):
|
||||
key = GATHER_ELEMENTS_HASH_KEY( axis, I32, in1_dtype, I32, img_2d );
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#undef _PACK_SELECT_KEY
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _gather_elements_kernel_param_def );
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_GATHER_ELEMENTS_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
float output_scale = vsi_nn_get_tensor_scale(outputs[0]);
|
||||
float output_zp = (float)vsi_nn_get_tensor_zero_point(outputs[0]);
|
||||
float input_scale = vsi_nn_get_tensor_scale(inputs[0]);
|
||||
float input_tail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
int32_t axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
int32_t axis_size = (int32_t)inputs[0]->attr.size[axis];
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, axis );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
input_scale = input_scale / output_scale;
|
||||
input_tail = output_zp - input_tail * input_scale;
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _GATHER_ELEMENTS_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
/* Pass parameters to node. */
|
||||
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input_scale );
|
||||
node_params[SCALAR_INPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &input_tail );
|
||||
node_params[SCALAR_INPUT_AXIS_SIZE] = vsi_nn_kernel_scalar_create(graph, I32, &axis_size );
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _GATHER_ELEMENTS_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS_SIZE] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( gather_elements, _setup )
|
||||
|
|
@ -60,8 +60,10 @@ static const _kernel_map_type _l2normalizescale_kernel_map[] =
|
|||
{
|
||||
HASH_L2NORMALIZESCALE_KERNELS_2D( 0, F32, F32, F32 )
|
||||
HASH_L2NORMALIZESCALE_KERNELS_2D( 0, U8, F32, U8 )
|
||||
HASH_L2NORMALIZESCALE_KERNELS_2D( 0, I32, F32, I32 )
|
||||
HASH_L2NORMALIZESCALE_KERNELS_2D( 1, F32, F32, F32 )
|
||||
HASH_L2NORMALIZESCALE_KERNELS_2D( 1, U8, F32, U8 )
|
||||
HASH_L2NORMALIZESCALE_KERNELS_2D( 1, I32, F32, I32 )
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -91,9 +93,6 @@ static vx_param_description_t _l2normalizescale_kernel_param_def[] =
|
|||
#define SCALAR_OUTPUT_SCALE (8)
|
||||
#define SCALAR_OUTPUT_TAIL (9)
|
||||
|
||||
#define L2NORMSCALE_PARAM_NUM 6
|
||||
#define L2NORMSCALE_QUANT_PARAM_NUM _cnt_of_array( _l2normalizescale_kernel_param_def )
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
|
|
@ -168,8 +167,7 @@ static vsi_status _query_kernel
|
|||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
int32_t axis,
|
||||
vsi_bool image_2d,
|
||||
vsi_bool *is_use_u8_kernel
|
||||
vsi_bool image_2d
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
|
|
@ -193,6 +191,10 @@ static vsi_status _query_kernel
|
|||
{
|
||||
in0_dtype = F32;
|
||||
}
|
||||
else if (I8 == in0_dtype || I16 == in0_dtype)
|
||||
{
|
||||
in0_dtype = I32;
|
||||
}
|
||||
|
||||
if (F16 == in1_dtype)
|
||||
{
|
||||
|
|
@ -203,16 +205,9 @@ static vsi_status _query_kernel
|
|||
{
|
||||
out_dtype = F32;
|
||||
}
|
||||
|
||||
if ((U8 == in0_dtype) || (U8 == out_dtype))
|
||||
else if (I8 == out_dtype || I16 == out_dtype)
|
||||
{
|
||||
param_def_size = L2NORMSCALE_QUANT_PARAM_NUM;
|
||||
*is_use_u8_kernel = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
param_def_size = L2NORMSCALE_PARAM_NUM;
|
||||
*is_use_u8_kernel = FALSE;
|
||||
out_dtype = I32;
|
||||
}
|
||||
|
||||
key = HASH_L2NORMALIZESCALE_HASH_KEY(axis, in0_dtype, in1_dtype, out_dtype, image_2d);
|
||||
|
|
@ -265,7 +260,6 @@ static vsi_nn_kernel_node_t _setup
|
|||
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
|
||||
float epsilon = (float)10e-12;
|
||||
float rsEps = 1.0f / sqrtf(epsilon);
|
||||
vsi_bool is_use_u8_kernel = FALSE;
|
||||
|
||||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
|
@ -282,7 +276,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
|
||||
image_2d = (inputs[0]->attr.dim_num == 2 || inputs[0]->attr.size[2] == 1);
|
||||
status = _query_kernel( kernel, inputs, outputs, axis, image_2d, &is_use_u8_kernel );
|
||||
status = _query_kernel( kernel, inputs, outputs, axis, image_2d );
|
||||
axis_size = inputs[0]->attr.size[axis];
|
||||
|
||||
|
||||
|
|
@ -291,7 +285,6 @@ static vsi_nn_kernel_node_t _setup
|
|||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if( node )
|
||||
{
|
||||
size_t node_params_num = L2NORMSCALE_PARAM_NUM;
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _L2NORMALIZESCALE_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
|
|
@ -301,29 +294,23 @@ static vsi_nn_kernel_node_t _setup
|
|||
graph, I32, &axis_size );
|
||||
node_params[SCALAR_EPS_VALUE] = vsi_nn_kernel_scalar_create(
|
||||
graph, F32, &rsEps );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
node_params[SCALAR_INPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &inputTail );
|
||||
node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
node_params[SCALAR_OUTPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &outputTail );
|
||||
node_params_num = L2NORMSCALE_QUANT_PARAM_NUM;
|
||||
}
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _L2NORMALIZESCALE_PARAM_NUM );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_AXIS] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_AXIS_SIZE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_EPS_VALUE] );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
|
||||
}
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "math.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
|
|
@ -43,8 +42,6 @@ __BEGIN_DECLS
|
|||
* Define kernel meta.
|
||||
*/
|
||||
#define KERNEL_SOURCE_1 "maximum",
|
||||
#define KERNEL_SOURCE_2 "maximum_fp16",
|
||||
#define KERNEL_SOURCE_3 "maximum_i16"
|
||||
|
||||
#define HASH_MAXIMUM_KEY(_input0_type, _input1_type, _output_type, _image_2d) \
|
||||
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_image_2d))
|
||||
|
|
@ -198,6 +195,15 @@ static vsi_status _query_kernel
|
|||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
#define CONVERT_I8_OR_I16TOI32(dtype) \
|
||||
dtype = (dtype == I8 || dtype == I16) ? I32 : dtype
|
||||
|
||||
CONVERT_I8_OR_I16TOI32(input0_dtype);
|
||||
CONVERT_I8_OR_I16TOI32(input1_dtype);
|
||||
CONVERT_I8_OR_I16TOI32(output_dtype);
|
||||
#undef CONVERT_I8_OR_I16TOI32
|
||||
|
||||
key = HASH_MAXIMUM_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
|
||||
|
||||
for ( i = 0; i < _cnt_of_array(kernel_map); i ++ )
|
||||
|
|
|
|||
|
|
@ -42,8 +42,6 @@ __BEGIN_DECLS
|
|||
* Define kernel meta.
|
||||
*/
|
||||
#define KERNEL_SOURCE_1 "minimum",
|
||||
#define KERNEL_SOURCE_2 "minimum_fp16",
|
||||
#define KERNEL_SOURCE_3 "minimum_i16"
|
||||
|
||||
#define HASH_MINIMUM_KEY(_input0_type, _input1_type, _output_type, _image_2d) \
|
||||
((_input0_type << 24) | (_input1_type << 16) | (_output_type << 8) | (_image_2d))
|
||||
|
|
@ -197,6 +195,15 @@ static vsi_status _query_kernel
|
|||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
input1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
#define CONVERT_I8_OR_I16TOI32(dtype) \
|
||||
dtype = (dtype == I8 || dtype == I16) ? I32 : dtype
|
||||
|
||||
CONVERT_I8_OR_I16TOI32(input0_dtype);
|
||||
CONVERT_I8_OR_I16TOI32(input1_dtype);
|
||||
CONVERT_I8_OR_I16TOI32(output_dtype);
|
||||
#undef CONVERT_I8_OR_I16TOI32
|
||||
|
||||
key = HASH_MINIMUM_KEY( input0_dtype, input1_dtype, output_dtype, image_2d );
|
||||
|
||||
for ( i = 0; i < _cnt_of_array(kernel_map); i ++ )
|
||||
|
|
|
|||
|
|
@ -176,6 +176,10 @@ static vsi_status _query_kernel
|
|||
{
|
||||
in_dtype = F32;
|
||||
}
|
||||
else if (in_dtype == I16 || in_dtype == I8)
|
||||
{
|
||||
in_dtype = I32;
|
||||
}
|
||||
|
||||
if (out_dtype == F16)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
|
@ -146,6 +145,7 @@ DEF_KERNEL_INITIALIZER(_roi_align_initializer)
|
|||
|
||||
final:
|
||||
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
|
||||
SAFE_FREE_TENSOR_ATTR(rois_attr);
|
||||
SAFE_FREE_TENSOR_ATTR(output_attr);
|
||||
|
||||
return status;
|
||||
|
|
@ -212,7 +212,6 @@ static vsi_status _query_kernel
|
|||
}
|
||||
|
||||
return status;
|
||||
|
||||
} /* _query_kernel() */
|
||||
|
||||
#define _INPUT_NUM (3)
|
||||
|
|
@ -326,4 +325,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( roi_align, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -76,8 +75,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -93,7 +92,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -243,4 +241,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( add_mean_std_norm, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@
|
|||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define _CPU_ARG_NUM (1)
|
||||
|
|
@ -138,20 +137,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_argmax_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -159,7 +144,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _argmax_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -210,4 +199,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( argmax, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@
|
|||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -139,20 +138,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_argmin_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -160,7 +145,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _argmin_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -211,4 +200,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( argmin, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -108,8 +108,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -128,7 +128,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -276,4 +275,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( axis_aligned_bbox_transform, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@
|
|||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
|
@ -160,20 +159,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
|
||||
#define SCALAR_INPUT_EPS (6)
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_batch_norm_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -181,7 +166,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _batch_norm_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -231,4 +220,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( batchnorm_single, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -225,8 +224,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
int32_t* int32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
int32_t* int32_out_buffer[_OUTPUT_NUM] = {0};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
#include "utils/vsi_nn_dtype_util_prv.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
|
@ -72,8 +71,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -92,7 +91,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i]->asymm.zero_point = 0;
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -217,4 +215,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( cast, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -77,8 +76,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -93,7 +92,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -212,10 +210,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
|
||||
return node;
|
||||
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( clip, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -206,20 +205,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
|
||||
#define INPUT_FUNC_OP (3)
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_comparisons_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -227,7 +212,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _comparisons_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -182,7 +181,6 @@ final:
|
|||
}
|
||||
|
||||
return status;
|
||||
|
||||
} /* _compute() */
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -150,20 +149,6 @@ static vx_param_description_t _depth2space_crd_kernel_param_def[] =
|
|||
};
|
||||
#define _DEPTH2SPACE_CRD_PARAM_NUM _cnt_of_array( _depth2space_crd_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_depth2space_crd_exec,
|
||||
_depth2space_crd_kernel_param_def,
|
||||
_cnt_of_array( _depth2space_crd_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -171,7 +156,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _depth2space_crd_exec;
|
||||
kernel->info.parameters = _depth2space_crd_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _depth2space_crd_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -220,4 +209,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( depth2space_internal, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
#include "cpu_backend/npuref_interface.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
|
@ -272,4 +271,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( depthwise_conv1d, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -82,8 +81,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -102,7 +101,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for ( i = 0; i < _OUTPUT_NUM; i++ )
|
||||
{
|
||||
|
|
@ -252,4 +250,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( detect_post_box, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@
|
|||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
|
@ -35,7 +34,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -199,8 +197,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -222,7 +220,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for ( i = 0; i < _OUTPUT_NUM; i++ )
|
||||
{
|
||||
|
|
@ -524,4 +521,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( detect_post_nms, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -43,13 +42,14 @@ typedef enum
|
|||
UNARY_COS,
|
||||
UNARY_EXP,
|
||||
UNARY_LOG,
|
||||
UNARY_ELU,
|
||||
UNARY_NEG,
|
||||
UNARY_HSIGMOID,
|
||||
UNARY_MISH,
|
||||
UNARY_ROUND,
|
||||
UNARY_GELU,
|
||||
UNARY_HGELU,
|
||||
UNARY_SELU,
|
||||
UNARY_CELU,
|
||||
} unary_type_e;
|
||||
|
||||
|
||||
|
|
@ -80,11 +80,6 @@ static float log_eval(float data)
|
|||
return logf(data);
|
||||
}
|
||||
|
||||
static float elu_eval(float data, float alpha)
|
||||
{
|
||||
return data >=0 ? data : expf(data) * alpha - alpha;
|
||||
}
|
||||
|
||||
static float neg_eval(float data)
|
||||
{
|
||||
return data * -1.0f;
|
||||
|
|
@ -117,45 +112,9 @@ static float round_eval(float data)
|
|||
return data;
|
||||
}
|
||||
|
||||
static float erf_eval(float x)
|
||||
{
|
||||
float res = 0;
|
||||
float tmp = x;
|
||||
float factorial = 1; /*n!*/
|
||||
float x_pow = x;
|
||||
int32_t one = 1;
|
||||
int32_t n = 1;
|
||||
|
||||
if (x <= -3)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else if (x >= 3)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (vsi_abs(tmp) > 1e-5)
|
||||
{
|
||||
res += tmp;
|
||||
|
||||
factorial *= n;
|
||||
one *= -1;
|
||||
x_pow *= x * x;
|
||||
tmp = one / factorial * x_pow / ( 2 * n + 1);
|
||||
|
||||
n ++;
|
||||
}
|
||||
#define VSI_MUL2_RSQRTPI (1.1283791670955126f)
|
||||
|
||||
res *= VSI_MUL2_RSQRTPI;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static float gelu_eval(float data)
|
||||
{
|
||||
data = (float)(0.5f * data * (1 + erf_eval(data / (float)sqrt(2.0f))));
|
||||
data = (float)(0.5f * data * (1 + vsi_nn_erf_impl(data / (float)sqrt(2.0f))));
|
||||
|
||||
return data;
|
||||
}
|
||||
|
|
@ -169,6 +128,23 @@ static float hgelu_eval(float data)
|
|||
return data * cdf;
|
||||
}
|
||||
|
||||
static float selu_eval(float data, float alpha, float gamma)
|
||||
{
|
||||
float y0 = alpha * gamma * expf(data) - alpha * gamma;
|
||||
float y1 = gamma * data;
|
||||
float y = data <= 0 ? y0 : y1;
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
static float celu_eval(float x, float alpha)
|
||||
{
|
||||
float positive = vsi_nn_max(0, x);
|
||||
float negative = vsi_nn_min(alpha * (expf(x / alpha) - 1), 0);
|
||||
|
||||
return positive + negative;
|
||||
}
|
||||
|
||||
DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
|
|
@ -227,9 +203,6 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
|
|||
case UNARY_LOG:
|
||||
data = log_eval(data);
|
||||
break;
|
||||
case UNARY_ELU:
|
||||
data = elu_eval(data, alpha);
|
||||
break;
|
||||
case UNARY_NEG:
|
||||
data = neg_eval(data);
|
||||
break;
|
||||
|
|
@ -248,6 +221,12 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
|
|||
case UNARY_HGELU:
|
||||
data = hgelu_eval(data);
|
||||
break;
|
||||
case UNARY_SELU:
|
||||
data = selu_eval(data, alpha, beta);
|
||||
break;
|
||||
case UNARY_CELU:
|
||||
data = celu_eval(data, alpha);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -287,20 +266,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
#define INPUT_SCALAR_ALPHA (3)
|
||||
#define INPUT_SCALAR_BETA (4)
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_eltwise_unary_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -308,7 +273,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _eltwise_unary_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -384,10 +353,11 @@ REGISTER_ELTWISE_UNARY_BACKEND_CPU( sin, UNARY_SIN )
|
|||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( cos, UNARY_COS )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( exp, UNARY_EXP )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( log, UNARY_LOG )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( elu, UNARY_ELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( neg, UNARY_NEG )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( hard_sigmoid, UNARY_HSIGMOID )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( mish, UNARY_MISH )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( round, UNARY_ROUND )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( gelu, UNARY_GELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( hard_gelu, UNARY_HGELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( selu, UNARY_SELU )
|
||||
REGISTER_ELTWISE_UNARY_BACKEND_CPU( celu, UNARY_CELU )
|
||||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -58,7 +57,6 @@ static vx_param_description_t _erf_kernel_param_def[] =
|
|||
};
|
||||
#define _ERF_PARAM_NUM _cnt_of_array( _erf_kernel_param_def )
|
||||
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
|
|
@ -74,8 +72,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -101,34 +99,10 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
#define VSI_ERF_PI 3.141592653589793
|
||||
for (i = 0; i < out_elements[0]; i ++)
|
||||
{
|
||||
/* 2 / sqrt(pi) * (sum[(-1)^n! * x ^ (2n + 1)] + x) */
|
||||
float x = vsi_clamp(f32_in_buffer[0][i], -2, 2);
|
||||
float res = 0;
|
||||
float tmp = x;
|
||||
float factorial = 1; /*n!*/
|
||||
float x_pow = x;
|
||||
int32_t one = 1;
|
||||
int32_t n = 1;
|
||||
|
||||
while (vsi_abs(tmp) > 1e-5)
|
||||
{
|
||||
res += tmp;
|
||||
|
||||
factorial *= n;
|
||||
one *= -1;
|
||||
x_pow *= x * x;
|
||||
tmp = one / factorial * x_pow / ( 2 * n + 1);
|
||||
|
||||
n ++;
|
||||
}
|
||||
|
||||
|
||||
res *= 2.0f / (float)sqrt(VSI_ERF_PI);
|
||||
|
||||
f32_out_buffer[0][i] = res;
|
||||
float x = vsi_nn_erf_impl(f32_in_buffer[0][i]);
|
||||
f32_out_buffer[0][i] = x;
|
||||
}
|
||||
|
||||
/* save data */
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -94,8 +93,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -110,7 +109,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -236,4 +234,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( floordiv, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -181,20 +180,6 @@ static vx_param_description_t _gather_kernel_param_def[] =
|
|||
};
|
||||
#define _GATHER_PARAM_NUM _cnt_of_array( _gather_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_gather_exec,
|
||||
_gather_kernel_param_def,
|
||||
_cnt_of_array( _gather_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -202,7 +187,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _gather_exec;
|
||||
kernel->info.parameters = _gather_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _gather_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -260,4 +249,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( gather, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,228 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _ARG_NUM (1)
|
||||
#define _INPUT_NUM (2)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _CPU_IO_NUM (_INPUT_NUM + _OUTPUT_NUM)
|
||||
#define _CPU_PARAM_NUM (_ARG_NUM + _CPU_IO_NUM)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.gather_elements")
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _gather_elements_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
// Add kererl parameters here
|
||||
};
|
||||
#define _GATHER_ELEMENTS_PARAM_NUM _cnt_of_array( _gather_elements_kernel_param_def )
|
||||
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||
float * buffer[2] = { NULL };
|
||||
int32_t* buffer_idx = NULL;
|
||||
size_t out_elements = 0;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
vsi_size_t a = 0;
|
||||
vsi_size_t o = 0;
|
||||
vsi_size_t i = 0;
|
||||
vsi_size_t outer_size[2] = {1, 1};
|
||||
vsi_size_t inner_size[2] = {1, 1};
|
||||
vsi_size_t axis_size[2] = {1, 1};
|
||||
int32_t axis = 0;
|
||||
|
||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||
tensors[1] = (vsi_nn_kernel_tensor_t)param[1];
|
||||
tensors[2] = (vsi_nn_kernel_tensor_t)param[2];
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( tensors[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
attr[1] = vsi_nn_kernel_tensor_attr_create( tensors[1] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||
attr[2] = vsi_nn_kernel_tensor_attr_create( tensors[2] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[2], "Create tensor attr buffer fail.", final );
|
||||
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[2] );
|
||||
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &axis);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[0], "Create input0 buffer fail.", final );
|
||||
|
||||
buffer_idx = (int32_t*)vsi_nn_kernel_tensor_create_buffer( tensors[1], attr[1], FALSE );
|
||||
CHECK_PTR_FAIL_GOTO( buffer_idx, "Create input1 buffer fail.", final );
|
||||
|
||||
buffer[1] = (float *)malloc( out_elements * sizeof(float) );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[1], "Create output buffer fail.", final );
|
||||
memset( buffer[1], 0, out_elements * sizeof(float) );
|
||||
|
||||
axis_size[0] = attr[0]->shape->data[axis];
|
||||
axis_size[1] = attr[2]->shape->data[axis];
|
||||
for (i = 0; i < (vsi_size_t)axis; ++i)
|
||||
{
|
||||
inner_size[0] *= attr[0]->shape->data[i];
|
||||
inner_size[1] *= attr[2]->shape->data[i];
|
||||
}
|
||||
|
||||
for (i = axis + 1; i < attr[2]->shape->size; ++i)
|
||||
{
|
||||
outer_size[0] *= attr[0]->shape->data[i];
|
||||
outer_size[1] *= attr[2]->shape->data[i];
|
||||
}
|
||||
|
||||
for (o = 0; o < outer_size[1]; o++)
|
||||
{
|
||||
for (a = 0; a < axis_size[1]; a++)
|
||||
{
|
||||
for (i = 0; i < inner_size[1]; i++)
|
||||
{
|
||||
vsi_ssize_t index = 0;
|
||||
vsi_size_t index0 = (o * axis_size[1] + a) * inner_size[1] + i;
|
||||
vsi_size_t index1 = 1;
|
||||
|
||||
index = (vsi_ssize_t)buffer_idx[index0];
|
||||
index = index < 0 ? index + (vsi_ssize_t)axis_size[0] : index;
|
||||
index1 = (o * axis_size[0] + index) * inner_size[0] + i;
|
||||
|
||||
buffer[1][index0] = buffer[0][index1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status = vsi_nn_kernel_tensor_write_from_float( tensors[2], attr[2],
|
||||
buffer[1], out_elements );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
final:
|
||||
if ( buffer_idx )
|
||||
{
|
||||
free( buffer_idx );
|
||||
}
|
||||
for ( i = 0; i < 2; i ++ )
|
||||
{
|
||||
if ( buffer[i] )
|
||||
{
|
||||
free( buffer[i] );
|
||||
}
|
||||
}
|
||||
for ( i = 0; i < _CPU_IO_NUM; i ++ )
|
||||
{
|
||||
if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _gather_elements_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _gather_elements_kernel_param_def );
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_GATHER_ELEMENTS_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis = vsi_nn_kernel_param_get_int32( params, "axis" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _GATHER_ELEMENTS_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
/* Pass parameters to node. */
|
||||
node_params[3] = vsi_nn_kernel_scalar_create( graph, I32, &axis );
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _GATHER_ELEMENTS_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[3] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( gather_elements, _setup )
|
||||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -166,20 +165,6 @@ static vx_param_description_t _gather_nd_kernel_param_def[] =
|
|||
};
|
||||
#define _GATHER_ND_PARAM_NUM _cnt_of_array( _gather_nd_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_gather_nd_exec,
|
||||
_gather_nd_kernel_param_def,
|
||||
_cnt_of_array( _gather_nd_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -187,7 +172,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _gather_nd_exec;
|
||||
kernel->info.parameters = _gather_nd_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _gather_nd_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -238,4 +227,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( gather_nd, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -195,8 +194,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -215,7 +214,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -504,4 +502,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( generate_proposals, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
|
@ -187,20 +186,6 @@ static vx_param_description_t _group_normalization_kernel_param_def[] =
|
|||
};
|
||||
#define _GROUP_NORMALIZATION_PARAM_NUM _cnt_of_array( _group_normalization_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_group_norm_exec,
|
||||
_group_normalization_kernel_param_def,
|
||||
_cnt_of_array( _group_normalization_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -208,7 +193,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _group_norm_exec;
|
||||
kernel->info.parameters = _group_normalization_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _group_normalization_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -312,4 +301,3 @@ final:
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( group_norm, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -474,7 +473,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
if( node )
|
||||
{
|
||||
_inputs = (vsi_nn_tensor_t**)malloc(input_count * sizeof(vsi_nn_tensor_t**));
|
||||
CHECK_PTR_FAIL_GOTO( _inputs, "Create buffer fail.", final );
|
||||
node_params = (vsi_nn_kernel_node_param_t *)malloc(sizeof(vsi_nn_kernel_node_param_t) * param_count);
|
||||
CHECK_PTR_FAIL_GOTO( node_params, "Create buffer fail.", final );
|
||||
for(i = 0; i < input_count; i++)
|
||||
{
|
||||
_inputs[i] = inputs[i];
|
||||
|
|
@ -504,6 +505,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
}
|
||||
|
||||
final:
|
||||
vsi_nn_safe_free(_inputs);
|
||||
vsi_nn_safe_free(node_params);
|
||||
return node;
|
||||
|
|
@ -512,4 +514,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( grucell_activation, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -179,4 +178,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( grucell_activation_sma, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -124,8 +123,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -187,20 +186,6 @@ static vx_param_description_t _instance_normalization_kernel_param_def[] =
|
|||
};
|
||||
#define _INSTANCE_NORMALIZATION_PARAM_NUM _cnt_of_array( _instance_normalization_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_instance_norm_exec,
|
||||
_instance_normalization_kernel_param_def,
|
||||
_cnt_of_array( _instance_normalization_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -208,7 +193,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _instance_norm_exec;
|
||||
kernel->info.parameters = _instance_normalization_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _instance_normalization_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -77,8 +76,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -99,7 +98,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -246,4 +244,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( l2normalizescale, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -184,20 +183,6 @@ static vx_param_description_t _layer_normalization_kernel_param_def[] =
|
|||
};
|
||||
#define _LAYER_NORMALIZATION_PARAM_NUM _cnt_of_array( _layer_normalization_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_layer_norm_exec,
|
||||
_layer_normalization_kernel_param_def,
|
||||
_LAYER_NORMALIZATION_PARAM_NUM,
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -205,7 +190,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _layer_norm_exec;
|
||||
kernel->info.parameters = _layer_normalization_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _layer_normalization_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -252,4 +241,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( layer_norm, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@
|
|||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define _CPU_ARG_NUM (2)
|
||||
|
|
@ -153,20 +152,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_log_softmax_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -174,7 +159,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _log_softmax_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -232,4 +221,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( log_softmax, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -72,8 +71,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -85,7 +84,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -197,4 +195,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( logical_not, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -96,8 +95,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -113,7 +112,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_attr_get_stride( in_attr[i], in_stride_size[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -264,4 +262,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( logical_ops, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -156,7 +155,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for( i = 0; i < _OUTPUT_NUM; i++ )
|
||||
|
|
@ -308,7 +306,6 @@ final:
|
|||
}
|
||||
|
||||
return status;
|
||||
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
|
|
@ -331,7 +328,6 @@ static vsi_status _query_kernel
|
|||
status = VSI_SUCCESS;
|
||||
|
||||
return status;
|
||||
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
|
|
@ -397,10 +393,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
|
||||
return node;
|
||||
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( lstmunit_activation, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -190,20 +189,6 @@ static vx_param_description_t _matrixmul_kernel_param_def[] =
|
|||
};
|
||||
#define _MATIRXMUL_PARAM_NUM _cnt_of_array( _matrixmul_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_matrixmul_exec,
|
||||
_matrixmul_kernel_param_def,
|
||||
_cnt_of_array( _matrixmul_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -211,7 +196,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _matrixmul_exec;
|
||||
kernel->info.parameters = _matrixmul_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _matrixmul_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -261,4 +250,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( matrixmul, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@
|
|||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -147,21 +146,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_maximum_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -169,7 +153,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _maximum_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -211,4 +199,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( maximum, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -143,21 +142,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_minimum_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -165,7 +149,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _minimum_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -207,4 +195,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( minimum, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -218,20 +217,6 @@ static vx_param_description_t _moments_kernel_param_def[] =
|
|||
};
|
||||
#define _MOMENTS_PARAM_NUM _cnt_of_array( _moments_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_moments_exec,
|
||||
_moments_kernel_param_def,
|
||||
_cnt_of_array( _moments_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -239,7 +224,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _moments_exec;
|
||||
kernel->info.parameters = _moments_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _moments_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -315,4 +304,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( moments, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -86,8 +85,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -112,7 +111,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -335,4 +333,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( poolwithargmax, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -146,21 +145,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_pow_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -168,7 +152,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _pow_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -210,4 +198,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pow, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -283,21 +282,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_pre_process_bgra_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -305,7 +289,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _pre_process_bgra_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -381,4 +369,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pre_process_bgra, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -194,21 +193,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_pre_process_gray_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -216,7 +200,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _pre_process_gray_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -280,4 +268,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pre_process_gray, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -256,21 +255,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_pre_process_nv12_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -278,7 +262,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _pre_process_nv12_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -354,4 +342,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pre_process_nv12, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,297 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _CPU_ARG_NUM (8)
|
||||
#define _CPU_INPUT_NUM (3)
|
||||
#define _CPU_OUTPUT_NUM (3)
|
||||
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
|
||||
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.pre_process_rgb888_planar")
|
||||
|
||||
#define DESCALE(x) (((x) + (1<<19)) >> 20)
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _pre_process_rgb888_planar_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _PRE_PROCESS_RGB888_PLANAR_PARAM_NUM _cnt_of_array( _pre_process_rgb888_planar_kernel_param_def )
|
||||
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VX_FAILURE;
|
||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||
float * buffer[_CPU_IO_NUM] = { NULL };
|
||||
size_t out_elements = 0;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
uint32_t i = 0;
|
||||
int32_t xRatio = 0, yRatio = 0, xOffset = 0, yOffset = 0;
|
||||
float mean[3] = {0}, scale = 1;
|
||||
|
||||
for (i = 0; i < _CPU_IO_NUM; i++)
|
||||
{
|
||||
tensors[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
attr[i] = vsi_nn_kernel_tensor_attr_create( tensors[i] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[i], "Create tensor attr buffer fail.", final );
|
||||
}
|
||||
|
||||
out_elements = vsi_nn_kernel_tensor_attr_get_size( attr[3] );
|
||||
|
||||
i = 6;
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xRatio);
|
||||
status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yRatio);
|
||||
status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &xOffset);
|
||||
status |= vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[i++], &yOffset);
|
||||
status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &mean[0]);
|
||||
status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &mean[1]);
|
||||
status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &mean[2]);
|
||||
status |= vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[i++], &scale);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[i], attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
buffer[i + 3] = (float *)malloc( out_elements * sizeof(float) );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[i + 3], "Create output buffer fail.", final );
|
||||
memset( buffer[i + 3], 0, out_elements * sizeof(float) );
|
||||
}
|
||||
|
||||
{
|
||||
int32_t line1[2], line2[2];
|
||||
int32_t dx = 0, dy = 0, idx = 0;
|
||||
int32_t src_width = (int32_t)attr[0]->shape->data[0];
|
||||
int32_t dst_width = (int32_t)attr[3]->shape->data[0];
|
||||
int32_t dst_height = (int32_t)attr[3]->shape->data[1];
|
||||
uint8_t result = 0;
|
||||
|
||||
for ( idx = 0; idx < 3; idx ++)
|
||||
{
|
||||
for ( dy = 0; dy < (int32_t)dst_height; dy ++)
|
||||
{
|
||||
for ( dx = 0; dx < (int32_t)dst_width; dx ++)
|
||||
{
|
||||
int32_t source_index = 0;
|
||||
int32_t output_index = dx + dy * dst_width;
|
||||
float finalVal = 0.0f;
|
||||
|
||||
if(xRatio != (1 << 15) || yRatio != (1 << 15))
|
||||
{
|
||||
int32_t fx = (dx * xRatio + (xRatio >> 1)) - (1 << 14);
|
||||
int32_t sx = fx & 0xffff8000; // Floor
|
||||
int32_t fy = 0, sy = 0;
|
||||
int32_t temp1 = 0;
|
||||
int32_t temp2 = 0;
|
||||
|
||||
fx -= sx;
|
||||
sx = sx >> 15;
|
||||
|
||||
sx = sx < 0 ? 0 : sx;
|
||||
sx = sx > src_width ? src_width - 1: sx;
|
||||
|
||||
fx = (fx +(1 << 4)) >> 5;
|
||||
|
||||
// for y
|
||||
fy = (dy * yRatio + (yRatio >> 1)) - (1<< 14);
|
||||
sy = fy & 0xffff8000; // Floor
|
||||
fy -= sy;
|
||||
sy = sy >> 15;
|
||||
|
||||
sy = sy < 0 ? 0 : sy;
|
||||
fy = fy < 0 ? 0 : fy;
|
||||
|
||||
fy = (fy + (1<< 4)) >> 5;
|
||||
|
||||
sx += xOffset;
|
||||
sy += yOffset;
|
||||
source_index = (sx + sy * src_width);
|
||||
|
||||
line1[0] = (int32_t)buffer[idx][source_index];
|
||||
line1[1] = (int32_t)buffer[idx][source_index + 1];
|
||||
line2[0] = (int32_t)buffer[idx][source_index + src_width];
|
||||
line2[1] = (int32_t)buffer[idx][source_index + src_width + 1];
|
||||
|
||||
temp1 = fx * (line1[1] - line1[0]) + (line1[0] << 10);
|
||||
temp2 = fx * (line2[1] - line2[0]) + (line2[0] << 10);
|
||||
temp1 = fy * (temp2 - temp1) + (temp1 << 10);
|
||||
result = (uint8_t)(DESCALE(temp1));
|
||||
finalVal = (result - mean[idx]) * scale;
|
||||
buffer[idx + 3][output_index] = finalVal;
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t offset = xOffset + yOffset * src_width;
|
||||
source_index = dx + dy * src_width + offset;
|
||||
finalVal = (buffer[0][source_index] - mean[idx]) * scale;
|
||||
buffer[1][output_index] = finalVal;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 3; i < _CPU_IO_NUM; i++)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( tensors[i], attr[i],
|
||||
buffer[i], out_elements );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for ( i = 0; i < _CPU_IO_NUM; i ++ )
|
||||
{
|
||||
if ( buffer[i] )
|
||||
{
|
||||
free( buffer[i] );
|
||||
}
|
||||
if (attr[i]) { vsi_nn_kernel_tensor_attr_release( &attr[i] ); }
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _pre_process_rgb888_planar_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _pre_process_rgb888_planar_kernel_param_def );
|
||||
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_PRE_PROCESS_RGB888_PLANAR_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs /* Add extra params */ );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
uint32_t index = 6;
|
||||
int32_t scale_x = vsi_nn_kernel_param_get_int32( params, "scale_x" );
|
||||
int32_t scale_y = vsi_nn_kernel_param_get_int32( params, "scale_y" );
|
||||
int32_t left = vsi_nn_kernel_param_get_int32( params, "left" );
|
||||
int32_t top = vsi_nn_kernel_param_get_int32( params, "top" );
|
||||
float r_mean = vsi_nn_kernel_param_get_float32( params, "r_mean" );
|
||||
float g_mean = vsi_nn_kernel_param_get_float32( params, "g_mean" );
|
||||
float b_mean = vsi_nn_kernel_param_get_float32( params, "b_mean" );
|
||||
float scale = vsi_nn_kernel_param_get_float32( params, "scale" );
|
||||
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _PRE_PROCESS_RGB888_PLANAR_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_x );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &scale_y );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &left );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, I32, &top );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &r_mean );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &g_mean );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &b_mean );
|
||||
node_params[index++] = vsi_nn_kernel_scalar_create( graph, F32, &scale );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _PRE_PROCESS_RGB888_PLANAR_PARAM_NUM );
|
||||
vsi_nn_kernel_scalar_release( &node_params[6] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[7] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[8] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[9] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[10] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[11] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[12] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[13] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pre_process_rgb888_planar, _setup )
|
||||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -282,21 +281,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_pre_process_rgb_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -304,7 +288,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _pre_process_rgb_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -380,4 +368,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pre_process_rgb, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -331,21 +330,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_pre_process_yuv420_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -353,7 +337,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _pre_process_yuv420_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -429,4 +417,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pre_process_yuv420, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -325,21 +324,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_pre_process_yuv444_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -347,7 +331,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _pre_process_yuv444_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -423,4 +411,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( pre_process_yuv444, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -144,21 +143,6 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}
|
||||
};
|
||||
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_prelu_exec,
|
||||
kernel_param_def,
|
||||
_cnt_of_array( kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -166,7 +150,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _prelu_exec;
|
||||
kernel->info.parameters = kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -216,4 +204,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( prelu, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@
|
|||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_eltwise.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -259,4 +258,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( random_multinomial, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -75,8 +74,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -95,7 +94,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -236,4 +234,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( reduceall_internal, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -75,8 +74,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -95,7 +94,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -236,4 +234,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( reduceany_internal, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -75,8 +74,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -95,7 +94,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -236,4 +234,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( reducemax_internal, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -75,8 +74,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -95,7 +94,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -237,4 +235,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( reducemin_internal, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -74,8 +73,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -94,7 +93,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -235,4 +233,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( reduceprod_internal, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -79,8 +78,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -96,7 +95,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -226,4 +224,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( relu_keras, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_error.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vsi_nn_vxkernel.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -215,20 +214,6 @@ static vx_param_description_t _repeat_kernel_param_def[] =
|
|||
};
|
||||
#define _REPEAT_PARAM_NUM _cnt_of_array( _repeat_kernel_param_def )
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
KERNEL_ID_PLACEHOLDER,
|
||||
_KERNEL_NAME,
|
||||
_repeat_exec,
|
||||
_repeat_kernel_param_def,
|
||||
_cnt_of_array( _repeat_kernel_param_def ),
|
||||
vsi_nn_KernelValidator,
|
||||
NULL,
|
||||
NULL,
|
||||
vsi_nn_KernelInitializer,
|
||||
vsi_nn_KernelDeinitializer
|
||||
};
|
||||
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_tensor_t* const* const inputs,
|
||||
|
|
@ -236,7 +221,11 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t* kernel
|
||||
)
|
||||
{
|
||||
memmove( &kernel->info, &_kernel_info, sizeof(vx_kernel_description_t) );
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _repeat_exec;
|
||||
kernel->info.parameters = _repeat_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _repeat_kernel_param_def );
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
|
@ -283,4 +272,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( repeat, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -77,8 +76,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -100,7 +99,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -268,4 +266,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( resize_1d_bilinear, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -77,8 +76,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -97,7 +96,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -268,4 +266,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( resize_1d_nearest, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -77,8 +76,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
|
|||
|
|
@ -76,8 +76,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -77,8 +76,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -102,7 +101,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for(i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -246,7 +244,6 @@ final:
|
|||
}
|
||||
|
||||
return status;
|
||||
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
|
|
@ -269,7 +266,6 @@ static vsi_status _query_kernel
|
|||
status = VSI_SUCCESS;
|
||||
|
||||
return status;
|
||||
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
|
|
@ -310,10 +306,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
|
||||
return node;
|
||||
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( resize_nearest, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@
|
|||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -150,8 +149,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
|
|
@ -179,7 +178,6 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -369,10 +367,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
|
||||
return node;
|
||||
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( roi_align, _setup )
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue