Update internal to 1.1.34
SHA: 67f1e Signed-off-by: Kainan Cha <kainan.zha@verisilicon.com>
This commit is contained in:
parent
d841b85859
commit
81cc868b6c
|
|
@ -157,4 +157,9 @@ DEF_OP(ONE_HOT)
|
|||
DEF_OP(NMS)
|
||||
DEF_OP(GROUPED_CONV1D)
|
||||
DEF_OP(SCATTER_ND_UPDATE)
|
||||
DEF_OP(GELU)
|
||||
DEF_OP(GELU)
|
||||
DEF_OP(CONV2D_LSTM)
|
||||
DEF_OP(CONV2D_LSTM_CELL)
|
||||
DEF_OP(GRU)
|
||||
DEF_OP(GRUCELL)
|
||||
DEF_OP(GRUCELL_ACTIVATION)
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ typedef struct
|
|||
typedef struct
|
||||
{
|
||||
vsi_nn_kernel_dtype_e dtype;
|
||||
vsi_int_array_t * shape;
|
||||
vsi_size_array_t * shape;
|
||||
vsi_nn_kernel_quant_type_e quant;
|
||||
union
|
||||
{
|
||||
|
|
@ -395,8 +395,8 @@ void vsi_nn_kernel_tensor_release
|
|||
vsi_nn_kernel_tensor_t vsi_nn_kernel_tensor_reshape
|
||||
(
|
||||
vsi_nn_kernel_tensor_t tensor,
|
||||
int32_t * shape,
|
||||
uint32_t rank
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t rank
|
||||
);
|
||||
|
||||
vsi_status vsi_nn_kernel_node_pass_param
|
||||
|
|
@ -670,7 +670,7 @@ vsi_status vsi_nn_kernel_register
|
|||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_gpu_check_shape
|
||||
( const int32_t * shape, size_t rank );
|
||||
( const vsi_size_t * shape, vsi_size_t rank );
|
||||
|
||||
vsi_status vsi_nn_kernel_gpu_add_param
|
||||
(
|
||||
|
|
@ -738,38 +738,38 @@ vsi_status vsi_nn_kernel_tensor_write
|
|||
size_t size
|
||||
);
|
||||
|
||||
static inline size_t vsi_nn_kernel_tensor_attr_get_size
|
||||
static inline vsi_size_t vsi_nn_kernel_tensor_attr_get_size
|
||||
( const vsi_nn_kernel_tensor_attr_t * attr )
|
||||
{
|
||||
if( !attr )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return vsi_nn_shape_get_size( attr->shape->data, attr->shape->size );
|
||||
return vsi_nn_shape_get_size( attr->shape->data, (vsi_size_t)attr->shape->size );
|
||||
} /* vsi_nn_kernel_tensor_attr_get_size() */
|
||||
|
||||
static inline size_t vsi_nn_kernel_tensor_attr_get_bytes
|
||||
static inline vsi_size_t vsi_nn_kernel_tensor_attr_get_bytes
|
||||
( const vsi_nn_kernel_tensor_attr_t * attr )
|
||||
{
|
||||
size_t size;
|
||||
size_t type_bytes;
|
||||
vsi_size_t size;
|
||||
vsi_size_t type_bytes;
|
||||
if( !attr )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
size = vsi_nn_kernel_tensor_attr_get_size( attr );
|
||||
type_bytes = vsi_nn_kernel_dtype_get_bytes( attr->dtype );
|
||||
type_bytes = (vsi_size_t)vsi_nn_kernel_dtype_get_bytes( attr->dtype );
|
||||
return size * type_bytes;
|
||||
} /* vsi_nn_kernel_tensor_attr_get_bytes() */
|
||||
|
||||
static inline void vsi_nn_kernel_tensor_attr_get_stride
|
||||
( const vsi_nn_kernel_tensor_attr_t * attr, size_t * out_stride)
|
||||
( const vsi_nn_kernel_tensor_attr_t * attr, vsi_size_t * out_stride)
|
||||
{
|
||||
if( !attr || !out_stride )
|
||||
{
|
||||
return;
|
||||
}
|
||||
vsi_nn_shape_get_stride( attr->shape->data, attr->shape->size, out_stride );
|
||||
vsi_nn_shape_get_stride( attr->shape->data, (vsi_size_t)attr->shape->size, out_stride );
|
||||
} /* vsi_nn_kernel_tensor_attr_get_size() */
|
||||
|
||||
static inline vsi_bool vsi_nn_kernel_tensor_attr_is_quantized
|
||||
|
|
@ -819,7 +819,7 @@ vsi_bool vsi_nn_dtype_convert_float_to_quantize_symm_perchannel
|
|||
(
|
||||
const float * buffer, size_t size,
|
||||
vsi_nn_kernel_dtype_e dtype,
|
||||
const int32_t * shape, size_t rank,
|
||||
const vsi_size_t * shape, size_t rank,
|
||||
const float * scale, size_t scale_size,
|
||||
const int32_t * zero_point, size_t zero_point_size,
|
||||
int32_t channel_dim,
|
||||
|
|
@ -862,7 +862,7 @@ vsi_bool vsi_nn_dtype_convert_quantize_symm_perchannel_to_float
|
|||
(
|
||||
const void * buffer, size_t size,
|
||||
vsi_nn_kernel_dtype_e dtype,
|
||||
const int32_t * shape, size_t rank,
|
||||
const vsi_size_t * shape, size_t rank,
|
||||
const float * scale, size_t scale_size,
|
||||
const int32_t * zero_point, size_t zero_point_size,
|
||||
int32_t channel_dim,
|
||||
|
|
@ -873,9 +873,9 @@ vsi_nn_tensor_t* vsi_nn_pad_tensor
|
|||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t * input,
|
||||
int32_t * pad_front,
|
||||
int32_t * pad_end,
|
||||
size_t pad_size,
|
||||
vsi_size_t * pad_front,
|
||||
vsi_size_t * pad_end,
|
||||
vsi_size_t pad_size,
|
||||
vsi_nn_pad_mode_e mode,
|
||||
float pad_value
|
||||
);
|
||||
|
|
|
|||
|
|
@ -30,20 +30,20 @@
|
|||
|
||||
vsi_bool vsi_nn_kernel_optimize_eltwise_shape
|
||||
(
|
||||
const int32_t* shape_x, const size_t rank_x,
|
||||
const int32_t* shape_y, const size_t rank_y,
|
||||
const int32_t* shape_output, const size_t rank_output,
|
||||
int32_t* out_shape_x, int32_t* out_shape_y,
|
||||
int32_t* out_shape_output, uint32_t* out_rank_output
|
||||
const vsi_size_t* shape_x, const vsi_size_t rank_x,
|
||||
const vsi_size_t* shape_y, const vsi_size_t rank_y,
|
||||
const vsi_size_t* shape_output, const vsi_size_t rank_output,
|
||||
vsi_size_t* out_shape_x, vsi_size_t* out_shape_y,
|
||||
vsi_size_t* out_shape_output, vsi_size_t* out_rank_output
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_optimize_broadcast_shape
|
||||
(
|
||||
const int32_t** shape_in, const size_t* rank_in,
|
||||
const vsi_size_t** shape_in, const vsi_size_t* rank_in,
|
||||
const int32_t input_num,
|
||||
const int32_t* shape_output, const size_t rank_output,
|
||||
int32_t** out_shape_in,
|
||||
int32_t* out_shape_output, uint32_t* out_rank_output
|
||||
const vsi_size_t* shape_output, const vsi_size_t rank_output,
|
||||
vsi_size_t** out_shape_in,
|
||||
vsi_size_t* out_shape_output, uint32_t* out_rank_output
|
||||
);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -30,53 +30,53 @@
|
|||
|
||||
vsi_bool vsi_nn_kernel_optimize_reduce_shape
|
||||
(
|
||||
const int32_t* shape_x, const size_t rank_x,
|
||||
const int32_t *axis, const size_t axis_size,
|
||||
const int32_t* shape_output, const size_t rank_output,
|
||||
int32_t* out_shape_x, uint32_t* out_rank_x,
|
||||
int32_t* out_shape_output, uint32_t* out_rank_output,
|
||||
const vsi_size_t* shape_x, const vsi_size_t rank_x,
|
||||
const int32_t *axis, const vsi_size_t axis_size,
|
||||
const vsi_size_t* shape_output, const vsi_size_t rank_output,
|
||||
vsi_size_t* out_shape_x, uint32_t* out_rank_x,
|
||||
vsi_size_t* out_shape_output, uint32_t* out_rank_output,
|
||||
int32_t* out_axis, uint32_t* out_axis_size
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_optimize_tensor_shape
|
||||
(
|
||||
const int32_t* shape_x, const size_t rank_x,
|
||||
const int32_t *axis, const size_t axis_size,
|
||||
int32_t* out_shape_x, uint32_t* out_rank_x,
|
||||
const vsi_size_t* shape_x, const vsi_size_t rank_x,
|
||||
const int32_t *axis, const vsi_size_t axis_size,
|
||||
vsi_size_t* out_shape_x, uint32_t* out_rank_x,
|
||||
int32_t* out_axis, uint32_t* out_axis_size
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_optimize_element_shape
|
||||
(
|
||||
const int32_t* shape_x, const size_t rank_x,
|
||||
int32_t* out_shape_x, int32_t* out_rank_x
|
||||
const vsi_size_t* shape_x, const vsi_size_t rank_x,
|
||||
vsi_size_t* out_shape_x, vsi_size_t* out_rank_x
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_optimize_softmax_shape
|
||||
(
|
||||
const int32_t* shape_x, const size_t rank_x, const int32_t axis,
|
||||
int32_t* out_shape_x, uint32_t* out_rank_x,int32_t* out_axis
|
||||
const vsi_size_t* shape_x, const vsi_size_t rank_x, const int32_t axis,
|
||||
vsi_size_t* out_shape_x, uint32_t* out_rank_x,int32_t* out_axis
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_optimize_tile_shape
|
||||
(
|
||||
const int32_t* shape_x, const size_t rank_x,
|
||||
const int32_t* multiples, const size_t rank,
|
||||
const int32_t* shape_output, const size_t rank_output,
|
||||
int32_t* out_shape_x, int32_t* out_shape_y,
|
||||
int32_t* out_shape_output, uint32_t* out_rank_output
|
||||
const vsi_size_t* shape_x, const vsi_size_t rank_x,
|
||||
const vsi_size_t* multiples, const vsi_size_t rank,
|
||||
const vsi_size_t* shape_output, const vsi_size_t rank_output,
|
||||
vsi_size_t* out_shape_x, vsi_size_t* out_shape_y,
|
||||
vsi_size_t* out_shape_output, vsi_size_t* out_rank_output
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_optimize_1d_tensor_shape
|
||||
(
|
||||
const int32_t* shape, const uint32_t rank,
|
||||
int32_t* out_shape, uint32_t* out_rank
|
||||
const vsi_size_t* shape, const uint32_t rank,
|
||||
vsi_size_t* out_shape, uint32_t* out_rank
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_kernel_optimize_nchw2xhw_shape
|
||||
(
|
||||
const int32_t* shape, const uint32_t rank,
|
||||
int32_t* out_shape, uint32_t* out_rank
|
||||
const vsi_size_t* shape, const uint32_t rank,
|
||||
vsi_size_t* out_shape, uint32_t* out_rank
|
||||
);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_OP_CONV2D_LSTM_H
|
||||
#define _VSI_NN_OP_CONV2D_LSTM_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
enum
|
||||
{
|
||||
CONV2D_LSTM_IN_INPUT = 0,
|
||||
CONV2D_LSTM_IN_H_STATE = 1,
|
||||
CONV2D_LSTM_IN_C_STATE = 2,
|
||||
|
||||
CONV2D_LSTM_IN_KERNEL_I2I = 3,
|
||||
CONV2D_LSTM_IN_KERNEL_I2F = 4,
|
||||
CONV2D_LSTM_IN_KERNEL_I2C = 5,
|
||||
CONV2D_LSTM_IN_KERNEL_I2O = 6,
|
||||
|
||||
CONV2D_LSTM_IN_KERNEL_R2I = 7,
|
||||
CONV2D_LSTM_IN_KERNEL_R2F = 8,
|
||||
CONV2D_LSTM_IN_KERNEL_R2C = 9,
|
||||
CONV2D_LSTM_IN_KERNEL_R2O = 10,
|
||||
|
||||
CONV2D_LSTM_IN_BIAS_I = 11,
|
||||
CONV2D_LSTM_IN_BIAS_F = 12,
|
||||
CONV2D_LSTM_IN_BIAS_C = 13,
|
||||
CONV2D_LSTM_IN_BIAS_O = 14,
|
||||
|
||||
CONV2D_LSTM_IN_CNT,
|
||||
|
||||
CONV2D_LSTM_OUT_OUTPUT = 0,
|
||||
CONV2D_LSTM_OUT_H_STATE = 1,
|
||||
CONV2D_LSTM_OUT_C_STATE = 2,
|
||||
|
||||
CONV2D_LSTM_OUT_CNT
|
||||
};
|
||||
|
||||
typedef struct _vsi_nn_conv2d_lstm_local
|
||||
{
|
||||
void * ptr;
|
||||
} vsi_nn_conv2d_lstm_local;
|
||||
|
||||
typedef struct _vsi_nn_conv2d_lstm_param
|
||||
{
|
||||
vsi_nn_conv2d_lstm_local * local;
|
||||
|
||||
vsi_nn_activation_e activation;
|
||||
vsi_nn_activation_e recurrent_activation;
|
||||
vsi_nn_con2d_lstm_dataformat data_format;
|
||||
vsi_bool return_sequences;
|
||||
uint32_t filters;
|
||||
vsi_nn_conv2d_param conv2d;
|
||||
} vsi_nn_conv2d_lstm_param;
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_OP_CONV2D_LSTM_CELL_H
|
||||
#define _VSI_NN_OP_CONV2D_LSTM_CELL_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#define CONV2D_LSTM_CELL_GATE_NUM 4 // i,f,c,o
|
||||
|
||||
enum
|
||||
{
|
||||
CONV2D_LSTM_CELL_IN_INPUT = 0,
|
||||
CONV2D_LSTM_CELL_IN_H_STATE = 1,
|
||||
CONV2D_LSTM_CELL_IN_C_STATE = 2,
|
||||
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_I2I = 3,
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_I2F = 4,
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_I2C = 5,
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_I2O = 6,
|
||||
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_R2I = 7,
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_R2F = 8,
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_R2C = 9,
|
||||
CONV2D_LSTM_CELL_IN_KERNEL_R2O = 10,
|
||||
|
||||
CONV2D_LSTM_CELL_IN_BIAS_I = 11,
|
||||
CONV2D_LSTM_CELL_IN_BIAS_F = 12,
|
||||
CONV2D_LSTM_CELL_IN_BIAS_C = 13,
|
||||
CONV2D_LSTM_CELL_IN_BIAS_O = 14,
|
||||
|
||||
CONV2D_LSTM_CELL_IN_CNT,
|
||||
|
||||
CONV2D_LSTM_CELL_OUT_OUTPUT = 0,
|
||||
CONV2D_LSTM_CELL_OUT_H_STATE = 1,
|
||||
CONV2D_LSTM_CELL_OUT_C_STATE = 2,
|
||||
|
||||
CONV2D_LSTM_CELL_OUT_CNT
|
||||
};
|
||||
|
||||
typedef struct _vsi_nn_conv2d_lstm_cell_local
|
||||
{
|
||||
void * ptr;
|
||||
} vsi_nn_conv2d_lstm_cell_local;
|
||||
|
||||
typedef struct _vsi_nn_conv2d_lstm_cell_param
|
||||
{
|
||||
vsi_nn_conv2d_lstm_cell_local * local;
|
||||
|
||||
vsi_nn_activation_e activation;
|
||||
vsi_nn_activation_e recurrent_activation;
|
||||
uint32_t filters;
|
||||
vsi_nn_conv2d_param conv2d;
|
||||
} vsi_nn_conv2d_lstm_cell_param;
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_OP_GRU_H
|
||||
#define _VSI_NN_OP_GRU_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
/* Define the inputs and outputs for GRU Layer */
|
||||
enum
|
||||
{
|
||||
GRU_IN_INPUT = 0,
|
||||
GRU_IN_H_STATE = 1,
|
||||
|
||||
/* input kernel */
|
||||
GRU_IN_KERNEL_I2Z = 2,
|
||||
GRU_IN_KERNEL_I2R = 3,
|
||||
GRU_IN_KERNEL_I2H = 4,
|
||||
|
||||
/* recurrent kernel */
|
||||
GRU_IN_KERNEL_R2Z = 5,
|
||||
GRU_IN_KERNEL_R2R = 6,
|
||||
GRU_IN_KERNEL_R2H = 7,
|
||||
|
||||
/* input bias */
|
||||
GRU_IN_BIAS_I2Z = 8,
|
||||
GRU_IN_BIAS_I2R = 9,
|
||||
GRU_IN_BIAS_I2H = 10,
|
||||
|
||||
/* recurrent bias */
|
||||
GRU_IN_BIAS_R2Z = 11,
|
||||
GRU_IN_BIAS_R2R = 12,
|
||||
GRU_IN_BIAS_R2H = 13,
|
||||
|
||||
GRU_IN_CNT,
|
||||
|
||||
GRU_OUT_OUTPUT = 0,
|
||||
GRU_OUT_H_STATE = 1,
|
||||
|
||||
GRU_OUT_CNT
|
||||
};
|
||||
|
||||
typedef struct _vsi_nn_gru_param
|
||||
{
|
||||
struct _vsi_nn_gru_local * local;
|
||||
|
||||
uint32_t num_units;
|
||||
vsi_nn_activation_e activation;
|
||||
vsi_nn_activation_e recurrent_activation;
|
||||
vsi_bool reset_after;
|
||||
vsi_bool return_sequences;
|
||||
vsi_bool time_major;
|
||||
} vsi_nn_gru_param;
|
||||
_compiler_assert(offsetof(vsi_nn_gru_param, local) == 0, \
|
||||
vsi_nn_gru_h );
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_OP_GRUCELL_H
|
||||
#define _VSI_NN_OP_GRUCELL_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
enum
|
||||
{
|
||||
GRUCELL_GATES_Z = 0,
|
||||
GRUCELL_GATES_R = 1,
|
||||
GRUCELL_GATES_H = 2,
|
||||
|
||||
GRUCELL_GATE_CNT
|
||||
};
|
||||
|
||||
/* Define the inputs and outputs for GRUCell */
|
||||
enum
|
||||
{
|
||||
GRUCELL_IN_INPUT = 0,
|
||||
GRUCELL_IN_H_STATE = 1,
|
||||
|
||||
/* input kernel */
|
||||
GRUCELL_IN_KERNEL_I2Z = 2,
|
||||
GRUCELL_IN_KERNEL_I2R = 3,
|
||||
GRUCELL_IN_KERNEL_I2H = 4,
|
||||
|
||||
/* recurrent kernel */
|
||||
GRUCELL_IN_KERNEL_R2Z = 5,
|
||||
GRUCELL_IN_KERNEL_R2R = 6,
|
||||
GRUCELL_IN_KERNEL_R2H = 7,
|
||||
|
||||
/* input bias */
|
||||
GRUCELL_IN_BIAS_I2Z = 8,
|
||||
GRUCELL_IN_BIAS_I2R = 9,
|
||||
GRUCELL_IN_BIAS_I2H = 10,
|
||||
|
||||
/* recurrent bias */
|
||||
GRUCELL_IN_BIAS_R2Z = 11,
|
||||
GRUCELL_IN_BIAS_R2R = 12,
|
||||
GRUCELL_IN_BIAS_R2H = 13,
|
||||
|
||||
GRUCELL_IN_CNT,
|
||||
|
||||
GRUCELL_OUT_OUTPUT = 0,
|
||||
GRUCELL_OUT_H_STATE = 1,
|
||||
|
||||
GRUCELL_OUT_CNT
|
||||
};
|
||||
|
||||
typedef struct _vsi_nn_grucell_param
|
||||
{
|
||||
struct _vsi_nn_grucell_local * local;
|
||||
|
||||
uint32_t num_units;
|
||||
vsi_nn_activation_e activation;
|
||||
vsi_nn_activation_e recurrent_activation;
|
||||
vsi_bool reset_after;
|
||||
} vsi_nn_grucell_param;
|
||||
_compiler_assert(offsetof(vsi_nn_grucell_param, local) == 0, \
|
||||
vsi_nn_conv1d_h );
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
#ifndef _VSI_NN_OP_GRUCELL_ACTIVATION_H
|
||||
#define _VSI_NN_OP_GRUCELL_ACTIVATION_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
enum {
|
||||
GRUCELL_ACT_IN_H_STATE = 0,
|
||||
GRUCELL_ACT_IN_INPUT_FC_H = 1,
|
||||
GRUCELL_ACT_IN_H_T = 2,
|
||||
GRUCELL_ACT_IN_Z_T = 3,
|
||||
|
||||
GRUCELL_ACT_IN_CNT,
|
||||
|
||||
GRUCELL_ACT_OUT_OUTPUT = 0,
|
||||
GRUCELL_ACT_OUT_H_STATE = 1,
|
||||
|
||||
GRUCELL_ACT_OUT_CNT
|
||||
};
|
||||
|
||||
typedef struct _vsi_nn_grucell_activation_param
|
||||
{
|
||||
struct _vsi_nn_grucell_activation_local * local;
|
||||
|
||||
vsi_nn_activation_e activation;
|
||||
} vsi_nn_grucell_activation_param;
|
||||
_compiler_assert(offsetof(vsi_nn_grucell_activation_param, local) == 0, \
|
||||
vsi_nn_grucell_activation_h );
|
||||
|
||||
#endif
|
||||
|
|
@ -61,7 +61,7 @@ typedef struct _vsi_nn_pre_process_param
|
|||
|
||||
struct
|
||||
{
|
||||
uint32_t *size;
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ typedef struct _vsi_nn_pre_process_bgra_param
|
|||
|
||||
struct
|
||||
{
|
||||
uint32_t *size;
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ typedef struct _vsi_nn_pre_process_gray_param
|
|||
|
||||
struct
|
||||
{
|
||||
uint32_t *size;
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ typedef struct _vsi_nn_pre_process_nv12_param
|
|||
|
||||
struct
|
||||
{
|
||||
uint32_t *size;
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ typedef struct _vsi_nn_pre_process_rgb_param
|
|||
|
||||
struct
|
||||
{
|
||||
uint32_t *size;
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ typedef struct _vsi_nn_pre_process_yuv420_param
|
|||
|
||||
struct
|
||||
{
|
||||
uint32_t *size;
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ typedef struct _vsi_nn_pre_process_yuv444_param
|
|||
|
||||
struct
|
||||
{
|
||||
uint32_t *size;
|
||||
vsi_size_t *size;
|
||||
uint32_t dim_num;
|
||||
} output_attr;
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ typedef struct _vsi_nn_reshape_lcl_data
|
|||
|
||||
typedef struct _vsi_nn_reshape_param
|
||||
{
|
||||
const uint32_t * size;
|
||||
const vsi_size_t * size;
|
||||
uint32_t dim_num;
|
||||
|
||||
/* reshape layer local data structure */
|
||||
|
|
|
|||
|
|
@ -174,31 +174,31 @@ OVXLIB_API vsi_status vsi_nn_Float32ToDtype
|
|||
const vsi_nn_dtype_t * dst_dtype
|
||||
);
|
||||
|
||||
OVXLIB_API int32_t vsi_nn_DtypeConvertRawData
|
||||
OVXLIB_API vsi_size_t vsi_nn_DtypeConvertRawData
|
||||
(
|
||||
uint8_t * src,
|
||||
int32_t src_bytes,
|
||||
vsi_size_t src_bytes,
|
||||
const vsi_nn_dtype_t * src_dtype,
|
||||
uint8_t * dst,
|
||||
int32_t dst_bytes,
|
||||
vsi_size_t dst_bytes,
|
||||
const vsi_nn_dtype_t * dst_dtype
|
||||
);
|
||||
|
||||
OVXLIB_API int32_t vsi_nn_DtypeConvertRawDataToFloat32
|
||||
OVXLIB_API vsi_size_t vsi_nn_DtypeConvertRawDataToFloat32
|
||||
(
|
||||
uint8_t * src,
|
||||
int32_t src_bytes,
|
||||
vsi_size_t src_bytes,
|
||||
const vsi_nn_dtype_t * src_dtype,
|
||||
float * dst,
|
||||
int32_t dst_size
|
||||
vsi_size_t dst_size
|
||||
);
|
||||
|
||||
OVXLIB_API int32_t vsi_nn_DtypeConvertFloat32ToRawData
|
||||
OVXLIB_API vsi_size_t vsi_nn_DtypeConvertFloat32ToRawData
|
||||
(
|
||||
float * src,
|
||||
int32_t src_size,
|
||||
vsi_size_t src_size,
|
||||
uint8_t * dst,
|
||||
int32_t dst_bytes,
|
||||
vsi_size_t dst_bytes,
|
||||
const vsi_nn_dtype_t * dst_dtype
|
||||
);
|
||||
|
||||
|
|
@ -221,7 +221,7 @@ OVXLIB_API vsi_status vsi_nn_vxConvertTensorToFloat32Data
|
|||
vx_tensor tensor,
|
||||
vsi_nn_tensor_attr_t *attr,
|
||||
float *f32_data,
|
||||
uint32_t f32_data_sz
|
||||
vsi_size_t f32_data_sz
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_status vsi_nn_vxConvertFloat32DataToTensor
|
||||
|
|
@ -230,7 +230,7 @@ OVXLIB_API vsi_status vsi_nn_vxConvertFloat32DataToTensor
|
|||
vx_tensor tensor,
|
||||
vsi_nn_tensor_attr_t *attr,
|
||||
float *f32_data,
|
||||
uint32_t f32_data_sz
|
||||
vsi_size_t f32_data_sz
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -507,7 +507,7 @@ vsi_bool vsi_nn_dtype_convert_float_to_quantize_asymm8
|
|||
vsi_bool vsi_nn_dtype_convert_float_to_quantize_symm8_perchannel
|
||||
(
|
||||
const float * buffer, size_t size,
|
||||
const int32_t * shape, size_t rank,
|
||||
const vsi_size_t * shape, size_t rank,
|
||||
const float * scale, size_t scale_size,
|
||||
const int32_t * zero_point, size_t zero_point_size,
|
||||
int32_t channel_dim,
|
||||
|
|
@ -552,7 +552,7 @@ vsi_bool vsi_nn_dtype_convert_quantize_asymm8_to_float
|
|||
vsi_bool vsi_nn_dtype_convert_quantize_symm8_perchannel_to_float
|
||||
(
|
||||
const int8_t * buffer, size_t size,
|
||||
const int32_t * shape, size_t rank,
|
||||
const vsi_size_t * shape, size_t rank,
|
||||
const float * scale, size_t scale_size,
|
||||
const int32_t * zero_point, size_t zero_point_size,
|
||||
int32_t channel_dim,
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ extern "C" {
|
|||
}
|
||||
DEFINE_ARRAY_TYPE( int, int32_t )
|
||||
DEFINE_ARRAY_TYPE( float, float )
|
||||
DEFINE_ARRAY_TYPE( size, vsi_size_t )
|
||||
|
||||
#undef DEFINE_ARRAY_TYPE
|
||||
|
||||
|
|
@ -75,9 +76,9 @@ OVXLIB_API void vsi_nn_Transpose
|
|||
(
|
||||
uint8_t * dst,
|
||||
uint8_t * data,
|
||||
uint32_t * shape,
|
||||
uint32_t dim_num,
|
||||
uint32_t * perm,
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t dim_num,
|
||||
vsi_size_t * perm,
|
||||
vsi_nn_type_e type
|
||||
);
|
||||
|
||||
|
|
@ -85,38 +86,38 @@ OVXLIB_API void vsi_nn_Permute
|
|||
(
|
||||
uint8_t * dst,
|
||||
uint8_t * data,
|
||||
uint32_t * shape,
|
||||
uint32_t dim_num,
|
||||
uint32_t * perm,
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t dim_num,
|
||||
vsi_size_t * perm,
|
||||
vsi_nn_type_e type
|
||||
);
|
||||
|
||||
OVXLIB_API void vsi_nn_SqueezeShape
|
||||
(
|
||||
uint32_t * shape,
|
||||
uint32_t * dim_num
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t * dim_num
|
||||
);
|
||||
|
||||
OVXLIB_API uint32_t vsi_nn_ShapeProduct
|
||||
OVXLIB_API vsi_size_t vsi_nn_ShapeProduct
|
||||
(
|
||||
uint32_t * shape,
|
||||
uint32_t dim_num
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t dim_num
|
||||
);
|
||||
|
||||
//shape: row first <--> column first
|
||||
OVXLIB_API void vsi_nn_InvertShape
|
||||
(
|
||||
uint32_t * in,
|
||||
uint32_t dim_num,
|
||||
uint32_t * out
|
||||
vsi_size_t * in,
|
||||
vsi_size_t dim_num,
|
||||
vsi_size_t * out
|
||||
);
|
||||
|
||||
//Permute shape: row first <--> column first
|
||||
OVXLIB_API void vsi_nn_InvertPermuteShape
|
||||
(
|
||||
uint32_t * in,
|
||||
uint32_t dim_num,
|
||||
uint32_t * out
|
||||
vsi_size_t * in,
|
||||
vsi_size_t dim_num,
|
||||
vsi_size_t * out
|
||||
);
|
||||
|
||||
OVXLIB_API double vsi_nn_Rint
|
||||
|
|
@ -131,7 +132,7 @@ OVXLIB_API double vsi_nn_Rint
|
|||
* @param[in] the low uint32_t of the seed.
|
||||
* @param[in] the high uint32_t of the seed.
|
||||
*/
|
||||
OVXLIB_API void vsi_nn_random_init_for_philox_4x32_10
|
||||
void vsi_nn_random_init_for_philox_4x32_10
|
||||
(
|
||||
uint32_t low,
|
||||
uint32_t high
|
||||
|
|
@ -144,7 +145,7 @@ OVXLIB_API void vsi_nn_random_init_for_philox_4x32_10
|
|||
* @param[out] the buffer for RNG output.
|
||||
* @param[in] the number of generated random numbers.
|
||||
*/
|
||||
OVXLIB_API void vsi_nn_random_generate_by_philox_4x32_10
|
||||
void vsi_nn_random_generate_by_philox_4x32_10
|
||||
(
|
||||
uint32_t *random_buf,
|
||||
uint32_t len
|
||||
|
|
@ -158,7 +159,7 @@ OVXLIB_API void vsi_nn_random_generate_by_philox_4x32_10
|
|||
* @param[out] the buffer for uniform float in [0, 1).
|
||||
* @param[in] the number of random numbers.
|
||||
*/
|
||||
OVXLIB_API void vsi_nn_random_uniform_transform
|
||||
void vsi_nn_random_uniform_transform
|
||||
(
|
||||
uint32_t *random_buf,
|
||||
float *uniform_buf,
|
||||
|
|
|
|||
|
|
@ -26,18 +26,19 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
void vsi_nn_shape_get_stride
|
||||
(
|
||||
const int32_t * shape,
|
||||
size_t rank,
|
||||
size_t * out_stride
|
||||
const vsi_size_t * shape,
|
||||
vsi_size_t rank,
|
||||
vsi_size_t * out_stride
|
||||
);
|
||||
|
||||
size_t vsi_nn_shape_get_size
|
||||
vsi_size_t vsi_nn_shape_get_size
|
||||
(
|
||||
const int32_t * shape,
|
||||
size_t rank
|
||||
const vsi_size_t * shape,
|
||||
vsi_size_t rank
|
||||
);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -72,27 +72,27 @@ extern "C" {
|
|||
OVXLIB_API uint8_t * vsi_nn_LoadBinaryData
|
||||
(
|
||||
const char * filename,
|
||||
uint32_t * sz
|
||||
vsi_size_t * sz
|
||||
);
|
||||
|
||||
OVXLIB_API uint32_t vsi_nn_GetStrideSize
|
||||
OVXLIB_API vsi_size_t vsi_nn_GetStrideSize
|
||||
(
|
||||
vsi_nn_tensor_attr_t * attr,
|
||||
uint32_t * stride
|
||||
vsi_size_t * stride
|
||||
);
|
||||
|
||||
OVXLIB_API uint32_t vsi_nn_GetStrideSizeBySize
|
||||
OVXLIB_API vsi_size_t vsi_nn_GetStrideSizeBySize
|
||||
(
|
||||
uint32_t * size,
|
||||
uint32_t dim_num,
|
||||
vsi_size_t * size,
|
||||
vsi_size_t dim_num,
|
||||
vsi_nn_type_e type,
|
||||
uint32_t * stride
|
||||
vsi_size_t * stride
|
||||
);
|
||||
|
||||
OVXLIB_API uint32_t vsi_nn_GetTotalBytesBySize
|
||||
OVXLIB_API vsi_size_t vsi_nn_GetTotalBytesBySize
|
||||
(
|
||||
uint32_t * size,
|
||||
uint32_t dim_num,
|
||||
vsi_size_t * size,
|
||||
vsi_size_t dim_num,
|
||||
vsi_nn_type_e type
|
||||
);
|
||||
|
||||
|
|
@ -116,10 +116,10 @@ OVXLIB_API void vsi_nn_UpdateTensorDims
|
|||
vsi_nn_tensor_attr_t * attr
|
||||
);
|
||||
|
||||
OVXLIB_API uint32_t vsi_nn_ComputeFilterSize
|
||||
OVXLIB_API vsi_size_t vsi_nn_ComputeFilterSize
|
||||
(
|
||||
uint32_t i_size,
|
||||
uint32_t ksize,
|
||||
vsi_size_t i_size,
|
||||
vsi_size_t ksize,
|
||||
uint32_t * pad,
|
||||
uint32_t stride,
|
||||
uint32_t dilation,
|
||||
|
|
@ -134,24 +134,24 @@ OVXLIB_API void vsi_nn_InitTensorsId
|
|||
|
||||
OVXLIB_API void vsi_nn_ComputePadWithPadType
|
||||
(
|
||||
uint32_t * in_shape,
|
||||
vsi_size_t * in_shape,
|
||||
uint32_t in_dim_num,
|
||||
uint32_t * ksize,
|
||||
vsi_size_t * ksize,
|
||||
uint32_t * stride,
|
||||
vsi_nn_pad_e pad_type,
|
||||
vsi_nn_round_type_e rounding,
|
||||
uint32_t * out_pad
|
||||
vsi_size_t * out_pad
|
||||
);
|
||||
|
||||
OVXLIB_API void vsi_nn_ComputePadWithPadTypeForConv1D
|
||||
(
|
||||
uint32_t * in_shape,
|
||||
vsi_size_t * in_shape,
|
||||
uint32_t in_dim_num,
|
||||
uint32_t * ksize,
|
||||
vsi_size_t * ksize,
|
||||
uint32_t * stride,
|
||||
vsi_nn_pad_e pad_type,
|
||||
vsi_nn_round_type_e rounding,
|
||||
uint32_t * out_pad
|
||||
vsi_size_t * out_pad
|
||||
);
|
||||
|
||||
OVXLIB_API void vsi_nn_GetPadForOvx
|
||||
|
|
@ -171,8 +171,8 @@ OVXLIB_API vsi_bool vsi_nn_CreateTensorGroup
|
|||
|
||||
OVXLIB_API uint32_t vsi_nn_ShapeToString
|
||||
(
|
||||
uint32_t * shape,
|
||||
uint32_t dim_num,
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t dim_num,
|
||||
char * buf,
|
||||
uint32_t buf_sz,
|
||||
vsi_bool for_print
|
||||
|
|
@ -207,9 +207,9 @@ OVXLIB_API vsi_bool vsi_nn_CheckFilePath
|
|||
*/
|
||||
OVXLIB_API uint8_t * vsi_nn_MallocAlignedBuffer
|
||||
(
|
||||
uint32_t mem_size,
|
||||
uint32_t align_start_size,
|
||||
uint32_t align_block_size
|
||||
vsi_size_t mem_size,
|
||||
vsi_size_t align_start_size,
|
||||
vsi_size_t align_block_size
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
@ -227,14 +227,14 @@ OVXLIB_API void vsi_nn_FreeAlignedBuffer
|
|||
OVXLIB_API vsi_bool vsi_nn_IsBufferAligned
|
||||
(
|
||||
uint8_t * buf,
|
||||
uint32_t align_start_size
|
||||
vsi_size_t align_start_size
|
||||
);
|
||||
|
||||
OVXLIB_API void vsi_nn_FormatToString
|
||||
(
|
||||
vsi_nn_tensor_t *tensor,
|
||||
char *buf,
|
||||
uint32_t buf_sz
|
||||
vsi_size_t buf_sz
|
||||
);
|
||||
|
||||
OVXLIB_API const char* vsi_nn_DescribeStatus
|
||||
|
|
@ -242,51 +242,33 @@ OVXLIB_API const char* vsi_nn_DescribeStatus
|
|||
vsi_status status
|
||||
);
|
||||
|
||||
uint32_t vsi_nn_compute_filter_shape
|
||||
vsi_size_t vsi_nn_compute_filter_shape
|
||||
(
|
||||
vsi_nn_pad_e padding_type,
|
||||
uint32_t image_size,
|
||||
uint32_t ksize,
|
||||
vsi_size_t image_size,
|
||||
vsi_size_t ksize,
|
||||
uint32_t stride,
|
||||
uint32_t dilation_rate
|
||||
);
|
||||
|
||||
void vsi_nn_compute_padding
|
||||
(
|
||||
uint32_t * in_shape,
|
||||
uint32_t * ksize,
|
||||
vsi_size_t * in_shape,
|
||||
vsi_size_t * ksize,
|
||||
uint32_t * stride,
|
||||
uint32_t * dilation,
|
||||
vsi_nn_pad_e pad_type,
|
||||
uint32_t * out_pad
|
||||
vsi_size_t * out_pad
|
||||
);
|
||||
|
||||
void vsi_nn_compute_padding_conv1d
|
||||
(
|
||||
uint32_t * in_shape,
|
||||
uint32_t * ksize,
|
||||
vsi_size_t * in_shape,
|
||||
vsi_size_t * ksize,
|
||||
uint32_t * stride,
|
||||
uint32_t * dilation,
|
||||
vsi_nn_pad_e pad_type,
|
||||
uint32_t * out_pad
|
||||
);
|
||||
|
||||
void vsi_nn_OptimizedEltOPShape
|
||||
(
|
||||
vsi_nn_tensor_t * input,
|
||||
uint32_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t * num_of_dims
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_OptimizedEltWiseOPShape
|
||||
(
|
||||
vsi_nn_tensor_t * input0,
|
||||
vsi_nn_tensor_t * input1,
|
||||
vsi_nn_tensor_t * output,
|
||||
uint32_t sizes0[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t sizes1[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t sizes2[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t * dim_num
|
||||
vsi_size_t * out_pad
|
||||
);
|
||||
|
||||
vsi_bool vsi_nn_IsEVISFeatureAvaiable
|
||||
|
|
@ -317,7 +299,7 @@ typedef uint32_t(*comp_func)(void* data, int32_t left, int32_t right);
|
|||
* @param[in] recursively execute vsi_nn_partition.
|
||||
* @param[out] the sorted index of data.
|
||||
*/
|
||||
OVXLIB_API int32_t vsi_nn_partition
|
||||
int32_t vsi_nn_partition
|
||||
(
|
||||
void* data,
|
||||
int32_t left,
|
||||
|
|
@ -350,7 +332,7 @@ static inline void vsi_nn_reorder_tensor
|
|||
}
|
||||
}
|
||||
|
||||
void vsi_nn_print_int_array( int32_t* array, size_t size );
|
||||
void vsi_nn_print_size_array( vsi_size_t* array, size_t size );
|
||||
|
||||
float vsi_nn_activation
|
||||
(
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ vsi_nn_internal_tensor_t* vsi_nn_internal_create_zero_bias_tensor
|
|||
vsi_nn_node_t* node,
|
||||
vsi_nn_tensor_attr_t* input_attr,
|
||||
vsi_nn_tensor_attr_t* weight_attr,
|
||||
vsi_nn_op_t op,
|
||||
vsi_bool use_virtual_tensor
|
||||
);
|
||||
|
||||
|
|
@ -132,8 +133,8 @@ vsi_nn_internal_node_t* vsi_nn_internal_new_node
|
|||
(
|
||||
vsi_nn_node_t* node,
|
||||
vsi_nn_op_t op,
|
||||
uint32_t input_num,
|
||||
uint32_t output_num
|
||||
vsi_size_t input_num,
|
||||
vsi_size_t output_num
|
||||
);
|
||||
|
||||
void* vsi_nn_internal_new_node_param
|
||||
|
|
|
|||
|
|
@ -117,8 +117,8 @@ OVXLIB_API vsi_nn_node_t * vsi_nn_NewNode
|
|||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_op_t op,
|
||||
uint32_t input_num,
|
||||
uint32_t output_num
|
||||
vsi_size_t input_num,
|
||||
vsi_size_t output_num
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -172,6 +172,11 @@
|
|||
#include "ops/vsi_nn_op_grouped_conv1d.h"
|
||||
#include "ops/vsi_nn_op_scatter_nd_update.h"
|
||||
#include "ops/vsi_nn_op_gelu.h"
|
||||
#include "ops/vsi_nn_op_conv2d_lstm.h"
|
||||
#include "ops/vsi_nn_op_conv2d_lstm_cell.h"
|
||||
#include "ops/vsi_nn_op_gru.h"
|
||||
#include "ops/vsi_nn_op_grucell.h"
|
||||
#include "ops/vsi_nn_op_grucell_activation.h"
|
||||
/* custom node head define define */
|
||||
#include "custom/vsi_nn_custom_node_type.h"
|
||||
|
||||
|
|
@ -330,6 +335,11 @@ typedef union _vsi_nn_nn_param
|
|||
vsi_nn_grouped_conv1d_param grouped_conv1d;
|
||||
vsi_nn_scatter_nd_update_param scatter_nd_update;
|
||||
vsi_nn_gelu_param gelu;
|
||||
vsi_nn_conv2d_lstm_param conv2d_lstm;
|
||||
vsi_nn_conv2d_lstm_cell_param conv2d_lstm_cell;
|
||||
vsi_nn_gru_param gru;
|
||||
vsi_nn_grucell_param grucell;
|
||||
vsi_nn_grucell_activation_param grucell_activation;
|
||||
uint8_t client_param[128];
|
||||
|
||||
/* custom node data struct define */
|
||||
|
|
|
|||
|
|
@ -280,8 +280,8 @@ void vsi_nn_OpGetIoNum
|
|||
(
|
||||
vsi_nn_op_t op,
|
||||
vsi_nn_node_t * node,
|
||||
uint32_t * input_num,
|
||||
uint32_t * output_num
|
||||
vsi_size_t * input_num,
|
||||
vsi_size_t * output_num
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_bool vsi_nn_OpGenerateTensor
|
||||
|
|
|
|||
|
|
@ -181,6 +181,7 @@ vsi_status vsi_nn_add_single_preproc_node
|
|||
(
|
||||
vsi_nn_graph_t* graph,
|
||||
uint32_t input_idx,
|
||||
vsi_nn_tensor_id_t input,
|
||||
vsi_nn_node_t** first_node,
|
||||
uint32_t nodes_count,
|
||||
vsi_nn_preprocess_base_t* preprocess,
|
||||
|
|
@ -234,7 +235,6 @@ OVXLIB_API vsi_status vsi_nn_AddGraphPostProcess
|
|||
uint32_t count
|
||||
);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -223,8 +223,8 @@ vsi_nn_internal_tensor_t* vsi_nn_rnn_create_reshape
|
|||
vsi_nn_node_t* self,
|
||||
vsi_nn_tensor_t* input_tensor,
|
||||
vsi_nn_tensor_t* output_tensor,
|
||||
uint32_t* size,
|
||||
uint32_t dim_num,
|
||||
vsi_size_t* size,
|
||||
vsi_size_t dim_num,
|
||||
vsi_bool use_virtual_tensor
|
||||
);
|
||||
|
||||
|
|
@ -233,8 +233,8 @@ vsi_nn_internal_tensor_t* vsi_nn_rnn_create_permute
|
|||
vsi_nn_node_t* self,
|
||||
vsi_nn_tensor_t* input_tensor,
|
||||
vsi_nn_tensor_t* output_tensor,
|
||||
uint32_t* perm,
|
||||
uint32_t dim_num,
|
||||
vsi_size_t* perm,
|
||||
vsi_size_t dim_num,
|
||||
vsi_bool use_virtual_tensor
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ typedef struct vsi_nn_dtype
|
|||
typedef struct vsi_nn_tensor_attr
|
||||
{
|
||||
/** Tensor shape */
|
||||
uint32_t size[VSI_NN_MAX_DIM_NUM];
|
||||
vsi_size_t size[VSI_NN_MAX_DIM_NUM];
|
||||
/** Dimension number */
|
||||
uint32_t dim_num;
|
||||
/** If it's virtual tensor*/
|
||||
|
|
|
|||
|
|
@ -183,10 +183,10 @@ OVXLIB_API uint8_t * vsi_nn_ConvertRawTensorToData
|
|||
(
|
||||
vx_context context,
|
||||
vx_tensor tensor,
|
||||
uint32_t * dim,
|
||||
vsi_size_t * dim,
|
||||
vx_enum * data_format,
|
||||
uint32_t * size,
|
||||
uint32_t * stride_size,
|
||||
vsi_size_t * size,
|
||||
vsi_size_t * stride_size,
|
||||
vx_tensor_addressing * addr,
|
||||
vx_enum accessor
|
||||
);
|
||||
|
|
@ -211,7 +211,7 @@ OVXLIB_API uint8_t * vsi_nn_ConvertRawTensorToData2
|
|||
vx_context context,
|
||||
vx_tensor tensor,
|
||||
vsi_nn_tensor_attr_t * attr,
|
||||
uint32_t * stride_size,
|
||||
vsi_size_t * stride_size,
|
||||
vx_tensor_addressing * addr,
|
||||
vx_enum accessor
|
||||
);
|
||||
|
|
@ -266,7 +266,7 @@ OVXLIB_API void vsi_nn_SaveDataToText
|
|||
(
|
||||
const char * filename,
|
||||
uint8_t * data,
|
||||
uint32_t data_size,
|
||||
vsi_size_t data_size,
|
||||
vsi_nn_type_e data_format,
|
||||
char * seperator
|
||||
);
|
||||
|
|
@ -356,7 +356,7 @@ OVXLIB_API vsi_status vsi_nn_CopyRawDataToTensor
|
|||
vsi_nn_tensor_t* tensor
|
||||
);
|
||||
|
||||
OVXLIB_API uint32_t vsi_nn_CopyTensorToBuffer
|
||||
OVXLIB_API vsi_size_t vsi_nn_CopyTensorToBuffer
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t * tensor,
|
||||
|
|
@ -394,25 +394,25 @@ OVXLIB_API void vsi_nn_TransposeTensor
|
|||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t * tensor,
|
||||
uint32_t * perm,
|
||||
uint32_t dim_num,
|
||||
uint32_t * as_shape
|
||||
vsi_size_t * perm,
|
||||
vsi_size_t dim_num,
|
||||
vsi_size_t * as_shape
|
||||
);
|
||||
|
||||
OVXLIB_API void vsi_nn_PermuteTensor
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t * tensor,
|
||||
uint32_t * perm,
|
||||
uint32_t dim_num
|
||||
vsi_size_t * perm,
|
||||
vsi_size_t dim_num
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_bool vsi_nn_CalcReshapeTensor
|
||||
(
|
||||
vsi_nn_tensor_t * input,
|
||||
vsi_nn_tensor_t * output,
|
||||
uint32_t * shape,
|
||||
uint32_t dim_num
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t dim_num
|
||||
);
|
||||
|
||||
OVXLIB_API vsi_bool vsi_nn_ReshapeTensor
|
||||
|
|
@ -420,8 +420,8 @@ OVXLIB_API vsi_bool vsi_nn_ReshapeTensor
|
|||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t * input,
|
||||
vsi_nn_tensor_t * output,
|
||||
const uint32_t * shape,
|
||||
uint32_t dim_num
|
||||
const vsi_size_t * shape,
|
||||
vsi_size_t dim_num
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
@ -430,7 +430,7 @@ OVXLIB_API vsi_bool vsi_nn_ReshapeTensor
|
|||
* @param[in] tensor Tensor handle.
|
||||
* @return Element number of the tensor.
|
||||
*/
|
||||
OVXLIB_API uint32_t vsi_nn_GetElementNum
|
||||
OVXLIB_API vsi_size_t vsi_nn_GetElementNum
|
||||
(
|
||||
const vsi_nn_tensor_t * tensor
|
||||
);
|
||||
|
|
@ -446,10 +446,10 @@ OVXLIB_API uint32_t vsi_nn_GetElementNum
|
|||
*
|
||||
* @return Size of the tensor.
|
||||
*/
|
||||
OVXLIB_API uint32_t vsi_nn_GetTensorSize
|
||||
OVXLIB_API vsi_size_t vsi_nn_GetTensorSize
|
||||
(
|
||||
const uint32_t * shape,
|
||||
uint32_t dim_num,
|
||||
const vsi_size_t * shape,
|
||||
vsi_size_t dim_num,
|
||||
vsi_nn_type_e dtype
|
||||
);
|
||||
|
||||
|
|
@ -507,8 +507,8 @@ OVXLIB_API void vsi_nn_Free
|
|||
OVXLIB_API vx_tensor vsi_nn_CreateViewTensor
|
||||
(
|
||||
vsi_nn_graph_t *graph,
|
||||
uint32_t *start,
|
||||
uint32_t *end,
|
||||
vsi_size_t *start,
|
||||
vsi_size_t *end,
|
||||
vsi_nn_tensor_t *tensor
|
||||
);
|
||||
|
||||
|
|
@ -536,7 +536,7 @@ OVXLIB_API vsi_status vsi_nn_SwapTensorHandle
|
|||
vsi_nn_tensor_t * tensor1
|
||||
);
|
||||
|
||||
OVXLIB_API uint32_t vsi_nn_vxGetTensorElementNum
|
||||
OVXLIB_API vsi_size_t vsi_nn_vxGetTensorElementNum
|
||||
(
|
||||
vsi_nn_tensor_attr_t *attr
|
||||
);
|
||||
|
|
@ -571,7 +571,7 @@ OVXLIB_API vsi_status vsi_nn_vxCopyDataToTensor
|
|||
*
|
||||
* @return the offset from the beginning of the tensor(offset unit: element)
|
||||
*/
|
||||
OVXLIB_API uint32_t vsi_nn_GetOffsetByCoords
|
||||
OVXLIB_API vsi_size_t vsi_nn_GetOffsetByCoords
|
||||
(
|
||||
vsi_nn_tensor_attr_t *attr,
|
||||
uint32_t *coords
|
||||
|
|
@ -621,8 +621,8 @@ vsi_nn_tensor_t *vsi_nn_reshape_tensor
|
|||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t * input,
|
||||
uint32_t * shape,
|
||||
uint32_t dim_num
|
||||
vsi_size_t * shape,
|
||||
vsi_size_t dim_num
|
||||
);
|
||||
|
||||
/**
|
||||
|
|
@ -646,9 +646,9 @@ vsi_status vsi_nn_copy_tensor_veiw_patch
|
|||
vx_tensor tensor,
|
||||
vsi_nn_tensor_attr_t *attr,
|
||||
void *user_ptr,
|
||||
uint32_t *start,
|
||||
uint32_t *end,
|
||||
uint32_t *stride,
|
||||
vsi_size_t *start,
|
||||
vsi_size_t *end,
|
||||
vsi_size_t *stride,
|
||||
vsi_enum usage,
|
||||
vsi_enum user_memory_type
|
||||
);
|
||||
|
|
|
|||
|
|
@ -37,6 +37,39 @@ extern "C"{
|
|||
#define inline __inline
|
||||
#endif
|
||||
|
||||
#if VX_VA40_EXT_SUPPORT
|
||||
#define VSI_40BIT_VA_SUPPORT
|
||||
#endif
|
||||
|
||||
#if (defined(_MSC_VER) || defined(__MINGW32))
|
||||
#define SIZE_T_SPECIFIER "Iu"
|
||||
#define SSIZE_T_SPECIFIER "Id"
|
||||
#ifdef VSI_40BIT_VA_SUPPORT
|
||||
#define VSI_SIZE_T_SPECIFIER "Iu"
|
||||
#define VSI_SSIZE_T_SPECIFIER "Id"
|
||||
#else
|
||||
#define VSI_SIZE_T_SPECIFIER "u"
|
||||
#define VSI_SSIZE_T_SPECIFIER "d"
|
||||
#endif
|
||||
#else
|
||||
#define SIZE_T_SPECIFIER "zu"
|
||||
#define SSIZE_T_SPECIFIER "zd"
|
||||
#ifdef VSI_40BIT_VA_SUPPORT
|
||||
#define VSI_SIZE_T_SPECIFIER "zu"
|
||||
#define VSI_SSIZE_T_SPECIFIER "zd"
|
||||
#else
|
||||
#define VSI_SIZE_T_SPECIFIER "u"
|
||||
#define VSI_SSIZE_T_SPECIFIER "d"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <BaseTsd.h>
|
||||
typedef SSIZE_T ssize_t;
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
/** Enumuration type */
|
||||
typedef int32_t vsi_enum;
|
||||
/** Status type */
|
||||
|
|
@ -47,6 +80,16 @@ typedef int32_t vsi_bool;
|
|||
typedef uint16_t vsi_float16;
|
||||
/** Truncate float16 */
|
||||
typedef uint16_t vsi_bfloat16;
|
||||
/** Tensor size */
|
||||
#ifdef VSI_40BIT_VA_SUPPORT
|
||||
typedef size_t vsi_size_t;
|
||||
typedef ssize_t vsi_ssize_t;
|
||||
#else
|
||||
typedef uint32_t vsi_size_t;
|
||||
typedef int32_t vsi_ssize_t;
|
||||
#endif
|
||||
|
||||
#define VSI_SIZE_T
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
|
|
@ -180,6 +223,12 @@ typedef enum _vsi_nn_node_attr_preload_type_e
|
|||
VSI_NN_NODE_PRELOAD_AXISRAM
|
||||
} vsi_nn_node_attr_preload_type_e;
|
||||
|
||||
typedef enum _vsi_nn_con2d_lstm_dataformat
|
||||
{
|
||||
CONV2D_LSTM_CHANNELS_LAST,
|
||||
CONV2D_LSTM_CHANNELS_FIRST
|
||||
} vsi_nn_con2d_lstm_dataformat;
|
||||
|
||||
/** Deprecated */
|
||||
typedef uint32_t vsi_nn_size_t;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ extern "C"{
|
|||
|
||||
#define VSI_NN_VERSION_MAJOR 1
|
||||
#define VSI_NN_VERSION_MINOR 1
|
||||
#define VSI_NN_VERSION_PATCH 33
|
||||
#define VSI_NN_VERSION_PATCH 34
|
||||
#define VSI_NN_VERSION \
|
||||
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
|
||||
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ DEF_KERNEL_INITIALIZER(_softmax_initializer)
|
|||
return status;
|
||||
}
|
||||
|
||||
sf_size = attr->shape->data[0];
|
||||
sf_size = (int)attr->shape->data[0];
|
||||
|
||||
gpu_param.global_offset[0] = 0;
|
||||
gpu_param.global_offset[1] = 0;
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ static vsi_bool op_setup
|
|||
{
|
||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||
memmove(outputs[0]->attr.size, inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num * sizeof(uint32_t));
|
||||
inputs[0]->attr.dim_num * sizeof(vsi_size_t));
|
||||
}
|
||||
return TRUE;
|
||||
} /* op_setup() */
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
|
|||
};
|
||||
vx_tensor input0 = (vx_tensor)param[0];
|
||||
vsi_nn_kernel_tensor_attr_t *input0_attr = NULL;
|
||||
vsi_int_array_t *input_shape = NULL;
|
||||
vsi_size_array_t *input_shape = NULL;
|
||||
|
||||
input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
|
||||
CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ DEF_KERNEL_INITIALIZER(_argmax_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -238,13 +238,13 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis = 0;
|
||||
int32_t axis_size = 0;
|
||||
vsi_size_t axis_size = 0;
|
||||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ DEF_KERNEL_INITIALIZER(_argmin_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -233,13 +233,13 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis = 0;
|
||||
int32_t axis_size = 0;
|
||||
size_t axis_size = 0;
|
||||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -55,11 +55,6 @@ __BEGIN_DECLS
|
|||
HASH_BATCH_NORM_SH_KERNEL_NAME( SRC_TYPE, OUT_TYPE), \
|
||||
VSI_NN_GEN_BATCH_NORM_KERNEL_SOURCE_NAME },
|
||||
|
||||
#define TENSOR_BATCH_NORM_FLOAT( SRC_TYPE, OUT_TYPE) \
|
||||
{ HASH_BATCH_NORM_KEY( SRC_TYPE, OUT_TYPE, 0), \
|
||||
HASH_BATCH_NORM_SH_KERNEL_NAME( F32, F32), \
|
||||
VSI_NN_GEN_BATCH_NORM_KERNEL_SOURCE_NAME },
|
||||
|
||||
#define HASH_BATCH_NORM_SH_KERNEL_2D_NAME( SRC_TYPE, DST_TYPE) \
|
||||
CVIVANTE_NAMESPACE("batch_norm_"#SRC_TYPE"to"#DST_TYPE"_2D")
|
||||
|
||||
|
|
@ -68,40 +63,29 @@ __BEGIN_DECLS
|
|||
HASH_BATCH_NORM_SH_KERNEL_2D_NAME( SRC_TYPE, OUT_TYPE), \
|
||||
VSI_NN_GEN_BATCH_NORM_KERNEL_SOURCE_NAME },
|
||||
|
||||
#define TENSOR_BATCH_NORM_FLOAT_2D( SRC_TYPE, OUT_TYPE) \
|
||||
{ HASH_BATCH_NORM_KEY( SRC_TYPE, OUT_TYPE, 1), \
|
||||
HASH_BATCH_NORM_SH_KERNEL_2D_NAME( F32, F32), \
|
||||
VSI_NN_GEN_BATCH_NORM_KERNEL_SOURCE_NAME },
|
||||
|
||||
static const struct {
|
||||
uint32_t key;
|
||||
char* function_name;
|
||||
const char* source_name;
|
||||
} kernel_map[] =
|
||||
{
|
||||
TENSOR_BATCH_NORM_FLOAT(F32, F32)
|
||||
TENSOR_BATCH_NORM_FLOAT(F32, F32)
|
||||
TENSOR_BATCH_NORM_FLOAT(F32, F32)
|
||||
TENSOR_BATCH_NORM_FLOAT(F16, F16)
|
||||
TENSOR_BATCH_NORM_FLOAT(F16, F16)
|
||||
TENSOR_BATCH_NORM_FLOAT(F16, F16)
|
||||
TENSOR_BATCH_NORM_KERNELS(F32, F32)
|
||||
TENSOR_BATCH_NORM_KERNELS(F32, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(F32, I32)
|
||||
|
||||
TENSOR_BATCH_NORM_FLOAT_2D(F32, F32)
|
||||
TENSOR_BATCH_NORM_FLOAT_2D(F32, F32)
|
||||
TENSOR_BATCH_NORM_FLOAT_2D(F16, F16)
|
||||
TENSOR_BATCH_NORM_FLOAT_2D(F16, F16)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(F32, F32)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(F32, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(F32, I32)
|
||||
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS(U8, F32)
|
||||
TENSOR_BATCH_NORM_KERNELS(I32, I32)
|
||||
TENSOR_BATCH_NORM_KERNELS(I32, F32)
|
||||
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(U8, U8)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(U8, F32)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(I32, I32)
|
||||
TENSOR_BATCH_NORM_KERNELS_2D(I32, F32)
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -149,7 +133,7 @@ DEF_KERNEL_INITIALIZER(_log_softmax_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -190,6 +174,24 @@ static vsi_status _query_kernel
|
|||
|
||||
input_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
if (input_dtype == I8 || input_dtype == I16)
|
||||
{
|
||||
input_dtype = I32;
|
||||
}
|
||||
else if (input_dtype == F16)
|
||||
{
|
||||
input_dtype = F32;
|
||||
}
|
||||
|
||||
if (output_dtype == I8 || output_dtype == I16)
|
||||
{
|
||||
output_dtype = I32;
|
||||
}
|
||||
else if (output_dtype == F16)
|
||||
{
|
||||
output_dtype = F32;
|
||||
}
|
||||
|
||||
key = HASH_BATCH_NORM_KEY( input_dtype, output_dtype, image_2d );
|
||||
|
||||
for( i = 0; i < _cnt_of_array(kernel_map); i ++ )
|
||||
|
|
@ -239,14 +241,36 @@ static vsi_nn_kernel_node_t _setup
|
|||
if (inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC )
|
||||
{
|
||||
input_scale = inputs[0]->attr.dtype.scale;
|
||||
input_tail = 0 - (float)inputs[0]->attr.dtype.zero_point * inputs[0]->attr.dtype.scale;
|
||||
input_tail = (float)inputs[0]->attr.dtype.zero_point * inputs[0]->attr.dtype.scale;
|
||||
}
|
||||
else if (inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP )
|
||||
{
|
||||
if (inputs[0]->attr.dtype.fl > 0)
|
||||
{
|
||||
input_scale = (1.0f / ((float) ((int64_t)1 << inputs[0]->attr.dtype.fl)));
|
||||
}
|
||||
else
|
||||
{
|
||||
input_scale = ((float) ((int64_t)1 << -inputs[0]->attr.dtype.fl));
|
||||
}
|
||||
}
|
||||
|
||||
if (outputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC )
|
||||
{
|
||||
input_scale = 1.0f / outputs[0]->attr.dtype.scale;
|
||||
output_scale = 1.0f / outputs[0]->attr.dtype.scale;
|
||||
output_zp = (float)outputs[0]->attr.dtype.zero_point + 0.5f;
|
||||
}
|
||||
else if (outputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP )
|
||||
{
|
||||
if (outputs[0]->attr.dtype.fl > 0)
|
||||
{
|
||||
output_scale = (float) ((int64_t)1 << outputs[0]->attr.dtype.fl);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_scale = ((float) 1.0f / ((int64_t)1 << -outputs[0]->attr.dtype.fl));
|
||||
}
|
||||
}
|
||||
|
||||
if ( (inputs[1]->attr.is_const && inputs[2]->attr.is_const)
|
||||
|| ( inputs[1]->attr.dtype.vx_type != VSI_NN_TYPE_FLOAT16
|
||||
|
|
@ -262,7 +286,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ DEF_KERNEL_INITIALIZER(_cast_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -251,7 +251,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -251,7 +251,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
outputScale = 1.0f / outputScale;
|
||||
inputTail = -(inputTail * inputScale);
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -226,7 +226,7 @@ DEF_KERNEL_INITIALIZER(_comparisons_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -342,7 +342,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
float input1Scale = inputs[1]->attr.dtype.scale;
|
||||
float input1Tail = (float)inputs[1]->attr.dtype.zero_point * input1Scale;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * input_attr = NULL;
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
|
||||
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -314,8 +314,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_tensor_t* rs_tensors[2] = { NULL };
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
int32_t new_rank = 0;
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret;
|
||||
|
||||
float inputScale = inputs[0]->attr.dtype.scale;
|
||||
|
|
@ -325,17 +325,17 @@ static vsi_nn_kernel_node_t _setup
|
|||
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
||||
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shape, &new_rank );
|
||||
if( ret )
|
||||
{
|
||||
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], (uint32_t*)shape, new_rank );
|
||||
inputs[0], shape, new_rank );
|
||||
rs_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shape, new_rank );
|
||||
outputs[0], shape, new_rank );
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)rs_tensors[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
|
||||
rs_tensors[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ DEF_KERNEL_INITIALIZER(_erf_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -233,8 +233,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_ERF_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_tensor_t* rs_tensors[2] = { NULL };
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
int32_t new_rank = 0;
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret = FALSE;
|
||||
vsi_bool image_2d = FALSE;
|
||||
|
||||
|
|
@ -244,17 +244,17 @@ static vsi_nn_kernel_node_t _setup
|
|||
float outputZP = (float)outputs[0]->attr.dtype.zero_point + 0.5f;
|
||||
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shape, &new_rank );
|
||||
if ( ret )
|
||||
{
|
||||
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], (uint32_t*)shape, new_rank );
|
||||
inputs[0], shape, new_rank );
|
||||
rs_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shape, new_rank );
|
||||
outputs[0], shape, new_rank );
|
||||
}
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)rs_tensors[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( rs_tensors[0]->attr.size,
|
||||
rs_tensors[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ DEF_KERNEL_INITIALIZER(_floordiv_initializer)
|
|||
vx_status status = VX_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[2];
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t *output_shape = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
|
@ -266,7 +266,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
input0Tail = -(input0Tail * input0Scale);
|
||||
input1Tail = -(input1Tail * input1Scale);
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -95,16 +95,16 @@ static vx_param_description_t _gather_kernel_param_def[] =
|
|||
static vsi_status cal_gather_tensor_reshape_size
|
||||
(
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
int32_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t block_size,
|
||||
uint32_t idxFlg
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
uint32_t dims_num = inputs[0]->attr.dim_num;
|
||||
uint32_t *input_size = inputs[0]->attr.size;
|
||||
vsi_size_t *input_size = inputs[0]->attr.size;
|
||||
uint32_t i = 0;
|
||||
uint32_t elementCnt = 1;
|
||||
vsi_size_t elementCnt = 1;
|
||||
#define VSI_NN_MAX_IMAGE_WIDTH (65536)
|
||||
|
||||
for(i = 0; i < dims_num; ++i)
|
||||
|
|
@ -157,12 +157,12 @@ DEF_KERNEL_INITIALIZER(_gather_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * input1_shape = NULL;
|
||||
vsi_size_array_t * input1_shape = NULL;
|
||||
int32_t block_size = 0;
|
||||
int32_t block_num = 0;
|
||||
int32_t indices_num = 1;
|
||||
uint32_t input_dims1 = 0;
|
||||
vx_uint32 i = 0;
|
||||
vsi_ssize_t indices_num = 1;
|
||||
size_t input_dims1 = 0;
|
||||
size_t i = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -175,7 +175,7 @@ DEF_KERNEL_INITIALIZER(_gather_initializer)
|
|||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
input1_shape = attr[1]->shape;
|
||||
input_dims1 = (uint32_t)input1_shape->size;
|
||||
input_dims1 = input1_shape->size;
|
||||
for (i = 0; i < input_dims1; i++)
|
||||
{
|
||||
indices_num *= input1_shape->data[i];
|
||||
|
|
@ -270,7 +270,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_GATHER_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
|
||||
int32_t block_num = vsi_nn_kernel_param_get_int32( params, "block_num" );
|
||||
int32_t axis_num = vsi_nn_kernel_param_get_int32( params, "axis_num" );
|
||||
|
|
@ -284,7 +284,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ static vx_param_description_t _gather_nd_kernel_param_def[] =
|
|||
static vsi_status cal_gather_nd_tensor_reshape_size
|
||||
(
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
int32_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t block_size,
|
||||
uint32_t coordDim,
|
||||
int32_t* newDim
|
||||
|
|
@ -107,9 +107,9 @@ static vsi_status cal_gather_nd_tensor_reshape_size
|
|||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
uint32_t dims_num = inputs[0]->attr.dim_num;
|
||||
uint32_t *input_size = inputs[0]->attr.size;
|
||||
vsi_size_t *input_size = inputs[0]->attr.size;
|
||||
uint32_t i = 0;
|
||||
uint32_t elementCnt = 1;
|
||||
vsi_size_t elementCnt = 1;
|
||||
#define VSI_NN_MAX_IMAGE_WIDTH (65536)
|
||||
|
||||
newDim[0] = 0;
|
||||
|
|
@ -181,7 +181,7 @@ DEF_KERNEL_INITIALIZER(_gather_nd_initializer)
|
|||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
int32_t block_size = 0;
|
||||
int32_t indices_num = 1;
|
||||
vsi_ssize_t indices_num = 1;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -283,7 +283,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_GATHER_ND_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
|
||||
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
|
||||
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
||||
|
|
@ -296,7 +296,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -217,9 +217,9 @@ DEF_KERNEL_INITIALIZER(_groupnorm_sum_sqr_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * input_shape = NULL;
|
||||
int32_t width = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_size_array_t * input_shape = NULL;
|
||||
vsi_ssize_t width = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -274,7 +274,7 @@ DEF_KERNEL_INITIALIZER(_groupnorm_mean_vari_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
int32_t chn = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -320,10 +320,10 @@ DEF_KERNEL_INITIALIZER(_groupnorm_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * input_shape = NULL;
|
||||
int32_t width = 0;
|
||||
int32_t height = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_size_array_t * input_shape = NULL;
|
||||
vsi_ssize_t width = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
int32_t is2D = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
|
|
@ -448,15 +448,15 @@ static vsi_status _query_kernel
|
|||
static int32_t _optimize_gn_shape_cl
|
||||
(
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
int32_t group_size,
|
||||
vsi_size_t group_size,
|
||||
int32_t group_num,
|
||||
int32_t* opt_shape,
|
||||
vsi_size_t* opt_shape,
|
||||
int32_t* is2D_flg
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
int32_t group_shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
int32_t new_rank = 0;
|
||||
vsi_size_t group_shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t new_rank = 0;
|
||||
group_shape[0] = inputs[0]->attr.size[0];
|
||||
group_shape[1] = inputs[0]->attr.size[1];
|
||||
group_shape[2] = group_size;
|
||||
|
|
@ -510,17 +510,17 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_t * ikernels[INTERNAL_KERNEL_SIZE] = { NULL };
|
||||
vsi_nn_tensor_t * tensors[INTERNAL_KERNEL_SIZE] = { NULL };
|
||||
vsi_nn_kernel_tensor_t rs_input = NULL, rs_output = NULL;
|
||||
int32_t new_shape[VSI_NN_MAX_DIM_NUM] = { 1, 1, 1, 1 };
|
||||
vsi_size_t new_shape[VSI_NN_MAX_DIM_NUM] = { 1, 1, 1, 1 };
|
||||
int32_t is2D_flg = 0;
|
||||
uint32_t hashkeys[INTERNAL_KERNEL_SIZE] = { 0 };
|
||||
uint32_t hashkey = 0;
|
||||
int32_t i = 0;
|
||||
float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
|
||||
int32_t group_num = vsi_nn_kernel_param_get_int32( params, "group_num" );
|
||||
int32_t group_size = inputs[0]->attr.size[2] / group_num;
|
||||
vsi_size_t group_size = inputs[0]->attr.size[2] / group_num;
|
||||
|
||||
int32_t width = inputs[0]->attr.size[0];
|
||||
int32_t height = inputs[0]->attr.size[1];
|
||||
vsi_size_t width = inputs[0]->attr.size[0];
|
||||
vsi_size_t height = inputs[0]->attr.size[1];
|
||||
int32_t group_stride = 1;
|
||||
float input_zp = 0;
|
||||
float input_scale = 1.0f;
|
||||
|
|
@ -531,7 +531,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
float rSpaceOrg = 1.0f / (width * height);
|
||||
float group_ratio = 1.0f / (inputs[0]->attr.size[0] * inputs[0]->attr.size[1] * group_size);
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
@ -547,7 +547,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
width = new_shape[0];
|
||||
height = is2D_flg > 0 ? 1 : new_shape[1];
|
||||
group_stride = ((width + 15) / 16) * 4;
|
||||
group_stride = (int32_t)(((width + 15) / 16) * 4);
|
||||
|
||||
if (inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC)
|
||||
{
|
||||
|
|
@ -693,7 +693,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t pStride = 0;
|
||||
if (!is2D_flg)
|
||||
{
|
||||
pStride = inputs[1]->attr.size[0] / new_shape[1];
|
||||
pStride = (int32_t)(inputs[1]->attr.size[0] / new_shape[1]);
|
||||
rSpaceOrg = 1.0f / (new_shape[0] / pStride);
|
||||
}
|
||||
node_params[index++] = rs_input;
|
||||
|
|
|
|||
|
|
@ -196,11 +196,11 @@ DEF_KERNEL_INITIALIZER(_instancenorm_mean_vari_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * input_shape = NULL;
|
||||
vsi_size_array_t * input_shape = NULL;
|
||||
int32_t rsFlg = 0;
|
||||
int32_t width = 0;
|
||||
int32_t height = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_ssize_t width = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -263,11 +263,11 @@ DEF_KERNEL_INITIALIZER(_instancenorm_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * input_shape = NULL;
|
||||
vsi_size_array_t * input_shape = NULL;
|
||||
int32_t rsFlg = 0;
|
||||
int32_t width = 0;
|
||||
int32_t height = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_ssize_t width = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -410,9 +410,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
|
||||
int32_t reshape_flg = vsi_nn_kernel_param_get_int32( params, "reshape_flg" );
|
||||
|
||||
int32_t width = inputs[0]->attr.size[0];
|
||||
int32_t height = inputs[0]->attr.size[1];
|
||||
int32_t group_num = (width + 15) / 16;
|
||||
size_t width = inputs[0]->attr.size[0];
|
||||
size_t height = inputs[0]->attr.size[1];
|
||||
int32_t group_num = (int32_t)(width + 15) / 16;
|
||||
int32_t input_zp = 0;
|
||||
float input_scale = 1.0f;
|
||||
int32_t input_fl = 0;
|
||||
|
|
@ -460,7 +460,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
output_zp = 0;
|
||||
}
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
@ -504,7 +504,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
if (reshape_flg)
|
||||
{
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
shape[0] = inputs[0]->attr.size[0];
|
||||
shape[1] = inputs[0]->attr.size[1] * inputs[0]->attr.size[2];
|
||||
shape[2] = 1;
|
||||
|
|
@ -519,7 +519,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
if (inputs[1]->attr.dim_num < 2)
|
||||
{
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
shape[0] = inputs[1]->attr.size[0];
|
||||
shape[1] = 1;
|
||||
shape[2] = 1;
|
||||
|
|
@ -528,7 +528,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
if (inputs[2]->attr.dim_num < 2)
|
||||
{
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
shape[0] = inputs[2]->attr.size[0];
|
||||
shape[1] = 1;
|
||||
shape[2] = 1;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ DEF_KERNEL_INITIALIZER(_l2normalizescale_initializer)
|
|||
};
|
||||
int32_t axis = 0;
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t * output_shape = NULL;
|
||||
vsi_size_array_t * output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -258,7 +258,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_bool image_2d = FALSE;
|
||||
int32_t axis = 0;
|
||||
int32_t axis_size = 0;
|
||||
vsi_size_t axis_size = 0;
|
||||
float outputScale = outputs[0]->attr.dtype.scale == 0.0f ? 1.0f : outputs[0]->attr.dtype.scale;
|
||||
float outputTail = (float)outputs[0]->attr.dtype.zero_point;
|
||||
float inputScale = inputs[0]->attr.dtype.scale == 0.0f ? 1.0f : inputs[0]->attr.dtype.scale;
|
||||
|
|
@ -272,9 +272,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -118,10 +118,10 @@ DEF_KERNEL_INITIALIZER(_layernorm_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * input_shape = NULL;
|
||||
vsi_size_array_t * input_shape = NULL;
|
||||
//int32_t width = 0;
|
||||
int32_t height = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -231,8 +231,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
float eps = vsi_nn_kernel_param_get_float32( params, "eps" );
|
||||
|
||||
int32_t width = inputs[0]->attr.size[0];
|
||||
int32_t height = inputs[0]->attr.size[1];
|
||||
vsi_size_t width = inputs[0]->attr.size[0];
|
||||
vsi_size_t height = inputs[0]->attr.size[1];
|
||||
int32_t input_fl = 0;
|
||||
float input_zp = 0.0f;
|
||||
float input_scale = 1.0f;
|
||||
|
|
@ -288,7 +288,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
zp2ScaleE2 = input_zp * 2 * e2InScale;
|
||||
sumZpScaleE2 = width * input_zp * input_zp * e2InScale;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
@ -302,7 +302,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
if (inputs[1]->attr.dim_num < 2)
|
||||
{
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
shape[0] = inputs[1]->attr.size[0];
|
||||
shape[1] = 1;
|
||||
shape[2] = 1;
|
||||
|
|
@ -311,7 +311,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
if (inputs[2]->attr.dim_num < 2)
|
||||
{
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
shape[0] = inputs[2]->attr.size[0];
|
||||
shape[1] = 1;
|
||||
shape[2] = 1;
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ DEF_KERNEL_INITIALIZER(_log_softmax_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
int32_t axis = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
|
|
@ -251,7 +251,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
scaleValue = scaleValue * beta * inputScale;
|
||||
beta = beta * inputScale;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ DEF_KERNEL_INITIALIZER(_logical_not_initializer)
|
|||
vx_status status = VX_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[1];
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t *output_shape = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
|
@ -213,7 +213,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ DEF_KERNEL_INITIALIZER(_logical_ops_initializer)
|
|||
vx_status status = VX_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[2];
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t *output_shape = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
|
@ -230,7 +230,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
uint32_t ops_type = vsi_nn_kernel_param_get_int32( params, "ops_type" );
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -140,9 +140,9 @@ DEF_KERNEL_INITIALIZER(_matrixmul_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
int32_t width = 0;
|
||||
int32_t height = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_ssize_t width = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -178,7 +178,7 @@ static vsi_status _query_kernel
|
|||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
int32_t depth,
|
||||
vsi_size_t depth,
|
||||
int32_t transa
|
||||
)
|
||||
{
|
||||
|
|
@ -247,10 +247,10 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t transposeA = vsi_nn_kernel_param_get_int32( params, "transposeA" );
|
||||
int32_t transposeB = vsi_nn_kernel_param_get_int32( params, "transposeB" );
|
||||
int32_t transFlg = 0;
|
||||
uint32_t M = inputs[0]->attr.size[1];
|
||||
uint32_t K = inputs[0]->attr.size[0];
|
||||
uint32_t N = inputs[1]->attr.size[0];
|
||||
uint32_t depth = outputs[0]->attr.dim_num > 2 ? outputs[0]->attr.size[2] : 1;
|
||||
vsi_size_t M = inputs[0]->attr.size[1];
|
||||
vsi_size_t K = inputs[0]->attr.size[0];
|
||||
vsi_size_t N = inputs[1]->attr.size[0];
|
||||
vsi_size_t depth = outputs[0]->attr.dim_num > 2 ? outputs[0]->attr.size[2] : 1;
|
||||
uint32_t ac2zero = 0;
|
||||
uint32_t bc2zero = 0;
|
||||
float scale_a = 1.0f;
|
||||
|
|
@ -260,7 +260,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
float scale_out = 1.0f;
|
||||
float zp_out = 0;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ DEF_KERNEL_INITIALIZER(_maximum_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -248,7 +248,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
outputScale = vsi_abs(outputScale) < 1e-5 ? 0.0f : 1.0f / outputScale;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ DEF_KERNEL_INITIALIZER(_minimum_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -247,7 +247,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
outputScale = vsi_abs(outputScale) < 1e-5 ? 0.0f : 1.0f / outputScale;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -98,24 +98,19 @@ typedef struct
|
|||
static const _kernel_map_type moments_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
TENSOR_MOMENTS_KERNELS(U8, F16, 0, KERNEL_SOURCE_1)
|
||||
TENSOR_MOMENTS_KERNELS(F16, F16, 0, KERNEL_SOURCE_1)
|
||||
TENSOR_MOMENTS_KERNELS(U8, F32, 0, KERNEL_SOURCE_1)
|
||||
TENSOR_MOMENTS_KERNELS(F32, F32, 0, KERNEL_SOURCE_1)
|
||||
TENSOR_MOMENTS_KERNELS(I32, F32, 0, KERNEL_SOURCE_1)
|
||||
TENSOR_MOMENTS_KERNELS(U8, F16, 1, KERNEL_SOURCE_2)
|
||||
TENSOR_MOMENTS_KERNELS(F16, F16, 1, KERNEL_SOURCE_2)
|
||||
TENSOR_MOMENTS_KERNELS(U8, F32, 1, KERNEL_SOURCE_2)
|
||||
TENSOR_MOMENTS_KERNELS(F32, F32, 1, KERNEL_SOURCE_2)
|
||||
TENSOR_MOMENTS_KERNELS(I32, F32, 1, KERNEL_SOURCE_2)
|
||||
TENSOR_MOMENTS_KERNELS(U8, F16, 2, KERNEL_SOURCE_3)
|
||||
TENSOR_MOMENTS_KERNELS(F16, F16, 2, KERNEL_SOURCE_3)
|
||||
TENSOR_MOMENTS_KERNELS(U8, F32, 2, KERNEL_SOURCE_3)
|
||||
TENSOR_MOMENTS_KERNELS(F32, F32, 2, KERNEL_SOURCE_3)
|
||||
TENSOR_MOMENTS_KERNELS(I32, F32, 2, KERNEL_SOURCE_3)
|
||||
TENSOR_MOMENTS_TWO_AXIS_KERNELS(U8, F16, 0, 1, KERNEL_SOURCE_4)
|
||||
TENSOR_MOMENTS_TWO_AXIS_KERNELS(F16, F16, 0, 1, KERNEL_SOURCE_4)
|
||||
TENSOR_MOMENTS_TWO_AXIS_KERNELS(U8, F32, 0, 1, KERNEL_SOURCE_4)
|
||||
TENSOR_MOMENTS_TWO_AXIS_KERNELS(F32, F32, 0, 1, KERNEL_SOURCE_4)
|
||||
TENSOR_MOMENTS_TWO_AXIS_KERNELS(I32, F32, 0, 1, KERNEL_SOURCE_4)
|
||||
TENSOR_MOMENTS_THREE_AXIS_KERNELS(U8, F16, 0, 1, 2, KERNEL_SOURCE_5)
|
||||
TENSOR_MOMENTS_THREE_AXIS_KERNELS(F16, F16, 0, 1, 2, KERNEL_SOURCE_5)
|
||||
TENSOR_MOMENTS_THREE_AXIS_KERNELS(U8, F32, 0, 1, 2, KERNEL_SOURCE_5)
|
||||
TENSOR_MOMENTS_THREE_AXIS_KERNELS(F32, F32, 0, 1, 2, KERNEL_SOURCE_5)
|
||||
TENSOR_MOMENTS_THREE_AXIS_KERNELS(I32, F32, 0, 1, 2, KERNEL_SOURCE_5)
|
||||
};
|
||||
|
|
@ -160,13 +155,13 @@ static int32_t set_constant_border
|
|||
static int32_t get_moments_output_reshape_size
|
||||
(
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
int32_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
int32_t* axis,
|
||||
int32_t axis_num
|
||||
)
|
||||
{
|
||||
uint32_t out_dims_num = outputs[0]->attr.dim_num;
|
||||
uint32_t *output_size = outputs[0]->attr.size;
|
||||
vsi_size_t *output_size = outputs[0]->attr.size;
|
||||
uint32_t i = 0;
|
||||
int32_t out_rs_flg = 0;
|
||||
|
||||
|
|
@ -217,10 +212,10 @@ DEF_KERNEL_INITIALIZER(_moments_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
vsi_int_array_t * input_shape = NULL;
|
||||
int32_t width = 0;
|
||||
int32_t height = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_size_array_t * input_shape = NULL;
|
||||
vsi_ssize_t width = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
int32_t axis = 0;
|
||||
int32_t axis_num = 1;
|
||||
|
||||
|
|
@ -311,6 +306,15 @@ static vsi_status _query_kernel
|
|||
input0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
output_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (input0_dtype == I8 || input0_dtype == I16)
|
||||
{
|
||||
input0_dtype = I32;
|
||||
}
|
||||
else if (input0_dtype == F16)
|
||||
{
|
||||
input0_dtype = F32;
|
||||
}
|
||||
output_dtype = output_dtype == F16 ? F32 : output_dtype;
|
||||
key = HASH_MOMENTS_KEY( input0_dtype, output_dtype, axis_num, axis[0], axis[1], axis[2], rs_flg );
|
||||
|
||||
for( i = 0; i < _cnt_of_array(moments_map); i ++ )
|
||||
|
|
@ -353,8 +357,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_MOMENTS_PARAM_NUM] = { NULL };
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t out_shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t out_shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
int32_t out_rs_flg = 0;
|
||||
int32_t axis_num = 0;
|
||||
size_t axis_num_temp = 0;
|
||||
|
|
@ -365,9 +369,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_scalar_t scalar_list[INTERNAL_MOMENTS_SCALAR_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t reshape_tensors[3] = { NULL };
|
||||
|
||||
int32_t width = inputs[0]->attr.size[0];
|
||||
int32_t height = inputs[0]->attr.size[1];
|
||||
int32_t chn = inputs[0]->attr.size[2];
|
||||
vsi_size_t width = inputs[0]->attr.size[0];
|
||||
vsi_size_t height = inputs[0]->attr.size[1];
|
||||
vsi_size_t chn = inputs[0]->attr.size[2];
|
||||
int32_t input_zp = inputs[0]->attr.dtype.zero_point;
|
||||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float dim_ratio = (float)1.0 / (float)(width * height);
|
||||
|
|
@ -408,7 +412,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
dim_ratio = (float)1.0 / (float)(width * height * chn);
|
||||
}
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
@ -449,6 +453,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
if ( node )
|
||||
{
|
||||
uint32_t index = 0;
|
||||
int32_t constant_value = 0;
|
||||
/* Pass parameters to node. */
|
||||
if (reshape_tensors[0])
|
||||
{
|
||||
|
|
@ -488,7 +493,12 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_tensor_release( &node_params[1] );
|
||||
vsi_nn_kernel_tensor_release( &node_params[2] );
|
||||
}
|
||||
status = set_constant_border(node, inputs[0]->attr.dtype.zero_point);
|
||||
|
||||
if (inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_AFFINE_ASYMMETRIC)
|
||||
{
|
||||
constant_value = inputs[0]->attr.dtype.zero_point;
|
||||
}
|
||||
status = set_constant_border(node, constant_value);
|
||||
CHECK_STATUS(status);
|
||||
}
|
||||
}
|
||||
|
|
@ -521,4 +531,3 @@ static vsi_nn_kernel_node_t _setup
|
|||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( moments, _setup )
|
||||
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ DEF_KERNEL_INITIALIZER(_one_hot_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -228,11 +228,11 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_ONE_HOT_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_tensor_t* rs_tensors[2] = { NULL };
|
||||
int32_t shape[2][VSI_NN_MAX_DIM_NUM] = {{ 0 }};
|
||||
vsi_size_t shape[2][VSI_NN_MAX_DIM_NUM] = {{ 0 }};
|
||||
int32_t i = 0;
|
||||
int32_t num_elements = vsi_nn_vxGetTensorElementNum(&inputs[0]->attr);
|
||||
int32_t prefix_dim_size = 1;
|
||||
int32_t suffix_dim_size = 0;
|
||||
vsi_size_t num_elements = vsi_nn_vxGetTensorElementNum(&inputs[0]->attr);
|
||||
vsi_size_t prefix_dim_size = 1;
|
||||
vsi_size_t suffix_dim_size = 0;
|
||||
int32_t depth = vsi_nn_kernel_param_get_int32( params, "depth" );
|
||||
vsi_nn_kernel_dtype_e out_dtype;
|
||||
uint32_t data[2] = {0};
|
||||
|
|
@ -270,11 +270,11 @@ static vsi_nn_kernel_node_t _setup
|
|||
shape[1][2] = prefix_dim_size;
|
||||
|
||||
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], (uint32_t*)shape[0], 2 );
|
||||
inputs[0], shape[0], 2 );
|
||||
rs_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shape[1], 3 );
|
||||
outputs[0], shape[1], 3 );
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)rs_tensors[1]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( rs_tensors[1]->attr.size,
|
||||
rs_tensors[1]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
|
|||
vx_status status = VX_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[1];
|
||||
vsi_nn_kernel_tensor_attr_t * attr_out = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
vsi_bool image_2d = FALSE;
|
||||
|
||||
attr_out = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
|
|
@ -262,11 +262,11 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[1]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[1]->attr.size,
|
||||
outputs[1]->attr.dim_num ))
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -220,7 +220,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ DEF_KERNEL_INITIALIZER(_prelu_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[3] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -236,8 +236,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
|
||||
int32_t shapes[3][VSI_NN_MAX_DIM_NUM] = { { 0 } };
|
||||
uint32_t new_rank = 0;
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = { { 0 } };
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret;
|
||||
|
||||
float input0Scale = inputs[0]->attr.dtype.scale;
|
||||
|
|
@ -258,26 +258,26 @@ static vsi_nn_kernel_node_t _setup
|
|||
outputScale = vsi_abs(outputScale) < 1e-5 ? 0.0f : 1.0f / outputScale;
|
||||
|
||||
ret = vsi_nn_kernel_optimize_eltwise_shape(
|
||||
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
(int32_t *)inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
(int32_t *)outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
shapes[0], shapes[1], shapes[2], &new_rank );
|
||||
|
||||
if (ret)
|
||||
{
|
||||
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], (uint32_t*)shapes[0], new_rank );
|
||||
inputs[0], shapes[0], (uint32_t)new_rank );
|
||||
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[1], (uint32_t*)shapes[1], new_rank );
|
||||
inputs[1], shapes[1], (uint32_t)new_rank );
|
||||
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shapes[2], new_rank );
|
||||
outputs[0], shapes[2], (uint32_t)new_rank );
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)reshape_tensors[2]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[2]->attr.size,
|
||||
reshape_tensors[2]->attr.dim_num ) )
|
||||
{
|
||||
goto final;
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ DEF_KERNEL_INITIALIZER(_multinomial_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * attr = NULL;
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
|
||||
attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -193,8 +193,8 @@ DEF_KERNEL_INITIALIZER(_cdf_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * attr = NULL;
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
uint32_t batch = 0;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
vsi_size_t batch = 0;
|
||||
|
||||
attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -347,13 +347,13 @@ static vsi_nn_kernel_node_t _setup
|
|||
uint32_t hashkeys[INTERNAL_KERNEL_SIZE] = { 0 };
|
||||
uint32_t hashkey = 0;
|
||||
int32_t i;
|
||||
uint32_t iteration = (outputs[0]->attr.size[0] + 3) / 4;
|
||||
uint32_t iteration = (uint32_t)((outputs[0]->attr.size[0] + 3) / 4);
|
||||
float rand_max = (float)(pow(2.0,32));
|
||||
float re_rand_max = 1 / rand_max;
|
||||
|
||||
// Check if gpu can support the size
|
||||
if( !vsi_nn_kernel_gpu_check_shape(
|
||||
(int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num ) )
|
||||
outputs[0]->attr.size, outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -380,7 +380,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
attr.size[1] = 1;
|
||||
attr.dim_num = 2;
|
||||
tensors[SEEDS_INDEX] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[1], (uint32_t*)attr.size, attr.dim_num );
|
||||
inputs[1], attr.size, attr.dim_num );
|
||||
|
||||
in0_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
in1_dtype = vsi_nn_kernel_map_dtype( inputs[1]->attr.dtype.vx_type );
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ DEF_KERNEL_INITIALIZER(_reduceall_internal_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t * output_shape = NULL;
|
||||
vsi_size_array_t * output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -214,9 +214,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ DEF_KERNEL_INITIALIZER(_reduceany_internal_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t * output_shape = NULL;
|
||||
vsi_size_array_t * output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -214,9 +214,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ DEF_KERNEL_INITIALIZER(_reducemax_internal_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t * output_shape = NULL;
|
||||
vsi_size_array_t * output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -246,9 +246,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ DEF_KERNEL_INITIALIZER(_reducemin_internal_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t * output_shape = NULL;
|
||||
vsi_size_array_t * output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -235,9 +235,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ DEF_KERNEL_INITIALIZER(_reduceprod_internal_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t * output_shape = NULL;
|
||||
vsi_size_array_t * output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -258,9 +258,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
axis = vsi_nn_kernel_param_get_int32(params, "axis");
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num )
|
||||
|| axis > 2)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ DEF_KERNEL_INITIALIZER(_relu_keras_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -254,7 +254,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
float threshold = vsi_nn_kernel_param_get_float32( params, "threshold" );
|
||||
float offset = -alpha * threshold;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -124,8 +124,8 @@ DEF_KERNEL_INITIALIZER(_repeat_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
vsi_int_array_t * input_shape = NULL;
|
||||
int32_t height = 0, width = 0, chn = 0;
|
||||
vsi_size_array_t * input_shape = NULL;
|
||||
vsi_ssize_t height = 0, width = 0, chn = 0;
|
||||
int32_t is1d = 0;
|
||||
int32_t axis = 0;
|
||||
|
||||
|
|
@ -237,9 +237,9 @@ static int32_t _optimize_repeat_shape
|
|||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
int32_t* axis,
|
||||
int32_t* opt_shape_in,
|
||||
int32_t* opt_shape_out,
|
||||
int32_t* new_rank
|
||||
vsi_size_t* opt_shape_in,
|
||||
vsi_size_t* opt_shape_out,
|
||||
vsi_size_t* new_rank
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
|
|
@ -255,7 +255,7 @@ static int32_t _optimize_repeat_shape
|
|||
}
|
||||
else if (axis[0] == 3)
|
||||
{
|
||||
vsi_nn_kernel_optimize_element_shape( (int32_t*)inputs[0]->attr.size, 3, opt_shape_in, new_rank );
|
||||
vsi_nn_kernel_optimize_element_shape( inputs[0]->attr.size, 3, opt_shape_in, new_rank );
|
||||
if (opt_shape_in[1] == 1)
|
||||
{
|
||||
opt_shape_in[1] = inputs[0]->attr.size[3];
|
||||
|
|
@ -300,15 +300,15 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_REPEAT_PARAM_NUM] = { NULL };
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_kernel_tensor_t rs_input = NULL, rs_input1 = NULL, rs_output = NULL;
|
||||
int32_t new_shape[2][VSI_NN_MAX_DIM_NUM] = {{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }};
|
||||
int32_t new_rank[2] = {0, 0};
|
||||
vsi_size_t new_shape[2][VSI_NN_MAX_DIM_NUM] = {{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }};
|
||||
vsi_size_t new_rank[2] = {0, 0};
|
||||
int32_t axis = vsi_nn_kernel_param_get_int32( params, "axis" );
|
||||
|
||||
int32_t width = inputs[0]->attr.size[0];
|
||||
int32_t height = inputs[0]->attr.dim_num > 1 ? inputs[0]->attr.size[1] : 1;
|
||||
int32_t channel = inputs[0]->attr.dim_num > 2 ? inputs[0]->attr.size[2] : 1;
|
||||
vsi_size_t width = inputs[0]->attr.size[0];
|
||||
vsi_size_t height = inputs[0]->attr.dim_num > 1 ? inputs[0]->attr.size[1] : 1;
|
||||
vsi_size_t channel = inputs[0]->attr.dim_num > 2 ? inputs[0]->attr.size[2] : 1;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ DEF_KERNEL_INITIALIZER(_resize_1d_bilinear_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -233,8 +233,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
int32_t in_width = inputs[0]->attr.size[0];
|
||||
int32_t out_width = outputs[0]->attr.size[0];
|
||||
vsi_size_t in_width = inputs[0]->attr.size[0];
|
||||
vsi_size_t out_width = outputs[0]->attr.size[0];
|
||||
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
|
||||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float input_tail = -(input_zp * input_scale);
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -233,8 +233,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
int32_t in_width = inputs[0]->attr.size[0];
|
||||
int32_t out_width = outputs[0]->attr.size[0];
|
||||
vsi_size_t in_width = inputs[0]->attr.size[0];
|
||||
vsi_size_t out_width = outputs[0]->attr.size[0];
|
||||
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
|
||||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float output_scale = (0 == outputs[0]->attr.dtype.scale) ? \
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ DEF_KERNEL_INITIALIZER(_resize_bilinear_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -233,10 +233,10 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
int32_t in_width = inputs[0]->attr.size[0];
|
||||
int32_t in_height = inputs[0]->attr.size[1];
|
||||
int32_t out_width = outputs[0]->attr.size[0];
|
||||
int32_t out_height = outputs[0]->attr.size[1];
|
||||
vsi_size_t in_width = inputs[0]->attr.size[0];
|
||||
vsi_size_t in_height = inputs[0]->attr.size[1];
|
||||
vsi_size_t out_width = outputs[0]->attr.size[0];
|
||||
vsi_size_t out_height = outputs[0]->attr.size[1];
|
||||
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
|
||||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float input_tail = -(input_zp * input_scale);
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -237,10 +237,10 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
int32_t in_width = inputs[0]->attr.size[0];
|
||||
int32_t in_height = inputs[0]->attr.size[1];
|
||||
int32_t out_width = outputs[0]->attr.size[0];
|
||||
int32_t out_height = outputs[0]->attr.size[1];
|
||||
vsi_size_t in_width = inputs[0]->attr.size[0];
|
||||
vsi_size_t in_height = inputs[0]->attr.size[1];
|
||||
vsi_size_t out_width = outputs[0]->attr.size[0];
|
||||
vsi_size_t out_height = outputs[0]->attr.size[1];
|
||||
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
|
||||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float output_scale = (0 == outputs[0]->attr.dtype.scale) ? \
|
||||
|
|
|
|||
|
|
@ -119,8 +119,8 @@ DEF_KERNEL_INITIALIZER(_roi_align_initializer)
|
|||
};
|
||||
vsi_nn_kernel_tensor_attr_t * rois_attr = NULL;
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * rois_shape = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * rois_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
rois_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( rois_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -235,7 +235,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_bool image_2d = FALSE;
|
||||
uint32_t rank[_IO_NUM] = {0};
|
||||
int32_t shapes[_IO_NUM][VSI_NN_MAX_DIM_NUM] = {{ 1 }};
|
||||
vsi_size_t shapes[_IO_NUM][VSI_NN_MAX_DIM_NUM] = {{ 1 }};
|
||||
vsi_nn_tensor_t* reshape_tensors[_IO_NUM] = { NULL };
|
||||
int32_t i = 0;
|
||||
float width_ratio = vsi_nn_kernel_param_get_float32( params, "width_ratio" );
|
||||
|
|
@ -250,26 +250,26 @@ static vsi_nn_kernel_node_t _setup
|
|||
float rcp_of_out_height = 1.0f / (float)(outputs[0]->attr.size[1]);
|
||||
float sampling_x_ratio = width_sample_num > 0 ? (float)width_sample_num : 0;
|
||||
float sampling_y_ratio = height_sample_num > 0 ? (float)height_sample_num : 0;
|
||||
int depth = inputs[0]->attr.size[2];
|
||||
vsi_size_t depth = inputs[0]->attr.size[2];
|
||||
|
||||
vsi_nn_kernel_optimize_nchw2xhw_shape( (const int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
vsi_nn_kernel_optimize_nchw2xhw_shape( (const vsi_size_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shapes[0], &rank[0]);
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const vsi_size_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
shapes[1], &rank[1]);
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const int32_t*)inputs[2]->attr.size, inputs[2]->attr.dim_num,
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const vsi_size_t*)inputs[2]->attr.size, inputs[2]->attr.dim_num,
|
||||
shapes[2], &rank[2]);
|
||||
vsi_nn_kernel_optimize_nchw2xhw_shape( (const int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
vsi_nn_kernel_optimize_nchw2xhw_shape( (const vsi_size_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
shapes[3], &rank[3]);
|
||||
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
reshape_tensors[i] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[i], (uint32_t*)shapes[i], rank[i] );
|
||||
inputs[i], shapes[i], rank[i] );
|
||||
}
|
||||
reshape_tensors[_INPUT_NUM] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shapes[_INPUT_NUM], rank[_INPUT_NUM] );
|
||||
outputs[0], shapes[_INPUT_NUM], rank[_INPUT_NUM] );
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)reshape_tensors[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[0]->attr.size,
|
||||
inputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -96,19 +96,19 @@ static vx_param_description_t _scatter_nd_kernel_param_def[] =
|
|||
static vsi_status cal_scatter_nd_tensor_reshape_size
|
||||
(
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
int32_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t block_size,
|
||||
uint32_t coordDim,
|
||||
uint32_t* width,
|
||||
uint32_t* area,
|
||||
vsi_size_t* width,
|
||||
vsi_size_t* area,
|
||||
int32_t* newDim
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
uint32_t dims_num = inputs[0]->attr.dim_num;
|
||||
uint32_t *input_size = inputs[0]->attr.size;
|
||||
vsi_size_t *input_size = inputs[0]->attr.size;
|
||||
uint32_t i = 0;
|
||||
uint32_t elementCnt = 1;
|
||||
vsi_size_t elementCnt = 1;
|
||||
|
||||
if(coordDim != 0 && (width == NULL || area == NULL))
|
||||
{
|
||||
|
|
@ -180,8 +180,8 @@ DEF_KERNEL_INITIALIZER(_scatter_nd_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
int32_t block_size = 0;
|
||||
int32_t height = 0;
|
||||
vsi_ssize_t block_size = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -280,12 +280,12 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_SCATTER_ND_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
|
||||
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
|
||||
int32_t idx_num = vsi_nn_kernel_param_get_int32( params, "idx_num" );
|
||||
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
||||
uint32_t width = 0, area = 0;
|
||||
vsi_size_t width = 0, area = 0;
|
||||
|
||||
status = cal_scatter_nd_tensor_reshape_size(&inputs[0], shapes[0], coord_dim, 0, NULL, NULL, &rs_in_dim);
|
||||
status |= cal_scatter_nd_tensor_reshape_size(&inputs[1], shapes[1], block_size, 0, NULL, NULL, &rs_idx_dim);
|
||||
|
|
@ -296,7 +296,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -88,20 +88,20 @@ static vx_param_description_t _scatter_nd_update_kernel_param_def[] =
|
|||
static vsi_status cal_scatter_nd_update_tensor_reshape_size
|
||||
(
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
int32_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
vsi_size_t sizes[VSI_NN_MAX_DIM_NUM],
|
||||
uint32_t block_size,
|
||||
uint32_t coordDim,
|
||||
uint32_t* width,
|
||||
uint32_t* area,
|
||||
uint32_t* vol,
|
||||
vsi_size_t* width,
|
||||
vsi_size_t* area,
|
||||
vsi_size_t* vol,
|
||||
int32_t* newDim
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
uint32_t dims_num = inputs[0]->attr.dim_num;
|
||||
uint32_t *input_size = inputs[0]->attr.size;
|
||||
vsi_size_t *input_size = inputs[0]->attr.size;
|
||||
uint32_t i = 0;
|
||||
uint32_t elementCnt = 1;
|
||||
vsi_size_t elementCnt = 1;
|
||||
|
||||
if (coordDim != 0 && (width == NULL || area == NULL))
|
||||
{
|
||||
|
|
@ -185,8 +185,8 @@ DEF_KERNEL_INITIALIZER(_scatter_nd_update_initializer)
|
|||
};
|
||||
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
int32_t block_size = 0;
|
||||
int32_t height = 0;
|
||||
vsi_ssize_t block_size = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[3] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -274,12 +274,12 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_SCATTER_ND_UPDATE_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
int32_t block_size = vsi_nn_kernel_param_get_int32( params, "block_size" );
|
||||
int32_t coord_dim = vsi_nn_kernel_param_get_int32( params, "coord_dim" );
|
||||
int32_t idx_num = vsi_nn_kernel_param_get_int32( params, "idx_num" );
|
||||
int32_t rs_in_dim = 0, rs_idx_dim = 0, rs_out_dim = 0;
|
||||
uint32_t width = 0, area = 0, vol = 0;
|
||||
vsi_size_t width = 0, area = 0, vol = 0;
|
||||
int32_t offsetX = 0, offsetY = 0, offsetZ = 0, offsetW = 0, offset_idx = 0;
|
||||
|
||||
status = cal_scatter_nd_update_tensor_reshape_size(&inputs[1], shapes[0],
|
||||
|
|
@ -293,7 +293,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
@ -305,21 +305,21 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
if (coord_dim == 4 || coord_dim == 5)
|
||||
{
|
||||
offsetX = vol;
|
||||
offsetY = area;
|
||||
offsetZ = width;
|
||||
offsetX = (int32_t)vol;
|
||||
offsetY = (int32_t)area;
|
||||
offsetZ = (int32_t)width;
|
||||
offsetW = 1;
|
||||
}
|
||||
else if (coord_dim == 3)
|
||||
{
|
||||
offsetX = area;
|
||||
offsetY = width;
|
||||
offsetX = (int32_t)area;
|
||||
offsetY = (int32_t)width;
|
||||
offsetZ = 1;
|
||||
offsetW = 0;
|
||||
}
|
||||
else if (coord_dim == 2)
|
||||
{
|
||||
offsetX = width;
|
||||
offsetX = (int32_t)width;
|
||||
offsetY = 1;
|
||||
offsetZ = 0;
|
||||
offsetW = 0;
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
|
|||
|
||||
vx_tensor output = (vx_tensor)param[3];
|
||||
vsi_nn_kernel_tensor_attr_t *output_attr = NULL;
|
||||
vsi_int_array_t *output_shape = NULL;
|
||||
vsi_size_array_t *output_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output);
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
|
@ -252,7 +252,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
input0Tail = outputZP - input0Tail * input0Scale;
|
||||
input1Tail = outputZP - input1Tail * input1Scale;
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ DEF_KERNEL_INITIALIZER(_sequence_mask_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -194,14 +194,14 @@ static int32_t _optimize_mask_shape
|
|||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
int32_t max_len,
|
||||
int32_t* opt_shape_in,
|
||||
int32_t* opt_shape_out,
|
||||
vsi_size_t* opt_shape_in,
|
||||
vsi_size_t* opt_shape_out,
|
||||
int32_t* is2Dflg
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
int32_t in_shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
int32_t new_rank = 0;
|
||||
vsi_size_t in_shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t new_rank = 0;
|
||||
uint32_t i = 0;
|
||||
|
||||
for(i = 0; i < inputs[0]->attr.dim_num; i++)
|
||||
|
|
@ -242,7 +242,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_CL_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t rs_input = NULL, rs_output = NULL;
|
||||
int32_t new_shape[2][VSI_NN_MAX_DIM_NUM] = {{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }};
|
||||
vsi_size_t new_shape[2][VSI_NN_MAX_DIM_NUM] = {{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }};
|
||||
int32_t max_len = vsi_nn_kernel_param_get_int32( params, "max_len" );
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t is2Dflg = 0;
|
||||
|
|
@ -255,7 +255,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t input_fl = 0;
|
||||
int32_t output_fl = 0;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ DEF_KERNEL_INITIALIZER(_signal_frame_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -190,14 +190,14 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t axis = vsi_nn_kernel_param_get_int32( params, "axis" );
|
||||
int32_t pad_end = vsi_nn_kernel_param_get_int32( params, "pad_end" );
|
||||
float pad_value = vsi_nn_kernel_param_get_float32( params, "pad_val" );
|
||||
int32_t num_frames = outputs[0]->attr.size[axis + 1];
|
||||
vsi_size_t num_frames = outputs[0]->attr.size[axis + 1];
|
||||
int32_t rank = inputs[0]->attr.dim_num;
|
||||
int32_t inner = 1;
|
||||
int32_t outer = 1;
|
||||
int32_t length_samples = inputs[0]->attr.size[axis];
|
||||
vsi_size_t inner = 1;
|
||||
vsi_size_t outer = 1;
|
||||
vsi_size_t length_samples = inputs[0]->attr.size[axis];
|
||||
int32_t i = 0;
|
||||
vsi_nn_tensor_t* rs_tensors[2] = { NULL };
|
||||
int32_t shape[2][VSI_NN_MAX_DIM_NUM] = {{ 0 }};
|
||||
vsi_size_t shape[2][VSI_NN_MAX_DIM_NUM] = {{ 0 }};
|
||||
|
||||
for (i = 0; i < axis; i++)
|
||||
{
|
||||
|
|
@ -220,11 +220,11 @@ static vsi_nn_kernel_node_t _setup
|
|||
shape[1][3] = outer;
|
||||
|
||||
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], (uint32_t*)shape[0], 4 );
|
||||
inputs[0], shape[0], 4 );
|
||||
rs_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shape[1], 4 );
|
||||
outputs[0], shape[1], 4 );
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)rs_tensors[1]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( rs_tensors[1]->attr.size,
|
||||
rs_tensors[1]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ DEF_KERNEL_INITIALIZER(_slice_initializer)
|
|||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[2] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -235,11 +235,11 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_bool image_2d = FALSE;
|
||||
uint32_t rank[_IO_NUM] = {0};
|
||||
int32_t shapes[_IO_NUM][VSI_NN_MAX_DIM_NUM] = {{ 1 }};
|
||||
vsi_size_t shapes[_IO_NUM][VSI_NN_MAX_DIM_NUM] = {{ 1 }};
|
||||
vsi_nn_tensor_t* reshape_tensors[_IO_NUM] = { NULL };
|
||||
int32_t i = 0;
|
||||
int32_t input_batch = inputs[0]->attr.dim_num > 3 ? inputs[0]->attr.size[3] : 1;
|
||||
int32_t output_batch = outputs[0]->attr.dim_num > 3 ? outputs[0]->attr.size[3] : 1;
|
||||
vsi_size_t input_batch = inputs[0]->attr.dim_num > 3 ? inputs[0]->attr.size[3] : 1;
|
||||
vsi_size_t output_batch = outputs[0]->attr.dim_num > 3 ? outputs[0]->attr.size[3] : 1;
|
||||
float inputScale = inputs[0]->attr.dtype.scale;
|
||||
float inputTail = (float)inputs[0]->attr.dtype.zero_point * inputScale;
|
||||
float outputScale = outputs[0]->attr.dtype.scale;
|
||||
|
|
@ -247,22 +247,22 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
outputScale = vsi_abs(outputScale) < 1e-5 ? 0.0f : 1.0f / outputScale;
|
||||
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const vsi_size_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shapes[0], &rank[0]);
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const vsi_size_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num,
|
||||
shapes[1], &rank[1]);
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
vsi_nn_kernel_optimize_1d_tensor_shape( (const vsi_size_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
shapes[2], &rank[2]);
|
||||
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
reshape_tensors[i] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[i], (uint32_t*)shapes[i], rank[i] );
|
||||
inputs[i], shapes[i], rank[i] );
|
||||
}
|
||||
reshape_tensors[_INPUT_NUM] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shapes[_INPUT_NUM], rank[_INPUT_NUM] );
|
||||
outputs[0], shapes[_INPUT_NUM], rank[_INPUT_NUM] );
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)reshape_tensors[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[0]->attr.size,
|
||||
inputs[0]->attr.dim_num ) || input_batch != output_batch )
|
||||
{
|
||||
goto final;
|
||||
|
|
|
|||
|
|
@ -109,10 +109,10 @@ DEF_KERNEL_INITIALIZER(_space2depth_internal_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[1] = { NULL };
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
int32_t width = 0;
|
||||
int32_t height = 0;
|
||||
int32_t chn = 0;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
vsi_ssize_t width = 0;
|
||||
vsi_ssize_t height = 0;
|
||||
vsi_ssize_t chn = 0;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -255,7 +255,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
scaleInOut = inputScale / outputScale;
|
||||
zpInOut = outputZp - inputZp * scaleInOut;
|
||||
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
if ( !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ) )
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@
|
|||
#include "utils/vsi_nn_util.h"
|
||||
#include "utils/vsi_nn_math.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
#include "kernel/vsi_nn_kernel_gpu_shape_optimize.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
|
|
@ -168,7 +168,7 @@ DEF_KERNEL_INITIALIZER(_swish_initializer)
|
|||
vx_status status = VX_FAILURE;
|
||||
vx_tensor output = (vx_tensor)param[1];
|
||||
vsi_nn_kernel_tensor_attr_t * attr_out = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_size_array_t * out_shape = NULL;
|
||||
|
||||
attr_out = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)output );
|
||||
CHECK_PTR_FAIL_GOTO( attr_out, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
|
@ -273,8 +273,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_SWISH_PARAM_NUM] = {NULL};
|
||||
int32_t shapes[2][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
uint32_t new_rank = 0;
|
||||
vsi_size_t shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t swish_type = vsi_nn_kernel_param_get_int32( params, "type" );
|
||||
|
|
@ -284,6 +284,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
float outputScale = outputs[0]->attr.dtype.scale == 0.0f ? 0.0f : 1.0f / outputs[0]->attr.dtype.scale;
|
||||
float outputZP = (float)outputs[0]->attr.dtype.zero_point + 0.5f;
|
||||
vx_float32 logE = (vx_float32)(log10(exp(1.0f)) / log10(2.0f));
|
||||
vsi_bool ret = FALSE;
|
||||
|
||||
#if (VX_ACTIVATION_EXT_SUPPORT)
|
||||
if (VSI_NN_HW_EVIS_2 == graph->ctx->config.evis.ver)
|
||||
|
|
@ -292,10 +293,17 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
#endif
|
||||
|
||||
vsi_nn_OptimizedEltOPShape(inputs[0], (uint32_t *)(shapes[0]), &new_rank);
|
||||
vsi_nn_OptimizedEltOPShape(outputs[0], (uint32_t *)(shapes[1]), &new_rank);
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
shape, &new_rank );
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( shapes[0], new_rank ) )
|
||||
if( ret )
|
||||
{
|
||||
node_params[0] = vsi_nn_kernel_tensor_reshape( inputs[0]->t, shape, new_rank );
|
||||
node_params[1] = vsi_nn_kernel_tensor_reshape( outputs[0]->t, shape, new_rank );
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( shape, new_rank ) )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -318,8 +326,6 @@ static vsi_nn_kernel_node_t _setup
|
|||
if( node )
|
||||
{
|
||||
size_t node_params_num = _SWISH_PARAM_NUM;
|
||||
node_params[0] = vsi_nn_kernel_tensor_reshape( inputs[0]->t, shapes[0], new_rank );
|
||||
node_params[1] = vsi_nn_kernel_tensor_reshape( outputs[0]->t, shapes[1], new_rank );
|
||||
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &inputScale );
|
||||
node_params[SCALAR_INPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &inputTail );
|
||||
node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &outputScale );
|
||||
|
|
@ -338,8 +344,6 @@ static vsi_nn_kernel_node_t _setup
|
|||
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
|
||||
vsi_nn_kernel_tensor_release( &node_params[0] );
|
||||
vsi_nn_kernel_tensor_release( &node_params[1] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_SCALE] );
|
||||
|
|
@ -353,6 +357,15 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
}
|
||||
|
||||
if(node_params[0])
|
||||
{
|
||||
vsi_nn_kernel_tensor_release( &node_params[0] );
|
||||
}
|
||||
if(node_params[1])
|
||||
{
|
||||
vsi_nn_kernel_tensor_release( &node_params[1] );
|
||||
}
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
|
|||
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[2] = { NULL };
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[0], "Create tensor attr buffer fail.", final );
|
||||
|
|
@ -234,9 +234,9 @@ static vsi_status _query_kernel
|
|||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
static vsi_bool _is_supported_axis(int32_t* multiples, uint32_t multiples_num)
|
||||
static vsi_bool _is_supported_axis(vsi_size_t* multiples, vsi_size_t multiples_num)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
vsi_size_t i = 0;
|
||||
|
||||
if ( multiples_num < 4)
|
||||
{
|
||||
|
|
@ -274,12 +274,12 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
vsi_nn_tensor_t* reshape_tensors[2] = { NULL };
|
||||
int32_t shapes[3][VSI_NN_MAX_DIM_NUM] = { { 0 } };
|
||||
vsi_size_t shapes[3][VSI_NN_MAX_DIM_NUM] = { { 0 } };
|
||||
uint32_t i = 0;
|
||||
uint32_t new_rank = 0;
|
||||
vsi_size_t new_rank = 0;
|
||||
vsi_bool ret = FALSE;
|
||||
uint32_t dim = inputs[0]->attr.dim_num;
|
||||
int32_t multiples[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
vsi_size_t multiples[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
|
||||
for ( i = 0; i < dim; i++)
|
||||
{
|
||||
|
|
@ -287,9 +287,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
|
||||
ret = vsi_nn_kernel_optimize_tile_shape(
|
||||
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
(int32_t *)multiples, inputs[0]->attr.dim_num,
|
||||
(int32_t *)outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
multiples, inputs[0]->attr.dim_num,
|
||||
outputs[0]->attr.size, outputs[0]->attr.dim_num,
|
||||
shapes[0], shapes[1], shapes[2], &new_rank );
|
||||
|
||||
if (ret)
|
||||
|
|
@ -300,16 +300,16 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
|
||||
reshape_tensors[0] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[0], (uint32_t*)shapes[0], new_rank );
|
||||
inputs[0], shapes[0], new_rank );
|
||||
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
outputs[0], (uint32_t*)shapes[2], new_rank );
|
||||
outputs[0], shapes[2], new_rank );
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)reshape_tensors[1]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( reshape_tensors[1]->attr.size,
|
||||
outputs[0]->attr.dim_num ))
|
||||
{
|
||||
goto final;
|
||||
|
|
@ -323,9 +323,9 @@ static vsi_nn_kernel_node_t _setup
|
|||
|
||||
if( node )
|
||||
{
|
||||
uint32_t depthIn = new_rank > 2 ? reshape_tensors[0]->attr.size[2] : 1;
|
||||
uint32_t depthOut = new_rank > 2 ? reshape_tensors[1]->attr.size[2] : 1;
|
||||
uint32_t batchIn = new_rank > 3 ? reshape_tensors[0]->attr.size[3] : 1;
|
||||
uint32_t depthIn = (uint32_t)(new_rank > 2 ? reshape_tensors[0]->attr.size[2] : 1);
|
||||
uint32_t depthOut = (uint32_t)(new_rank > 2 ? reshape_tensors[1]->attr.size[2] : 1);
|
||||
uint32_t batchIn = (uint32_t)(new_rank > 3 ? reshape_tensors[0]->attr.size[3] : 1);
|
||||
|
||||
vsi_nn_kernel_node_pack_io( node_params, _CL_PARAM_NUM,
|
||||
&reshape_tensors[0], 1, &reshape_tensors[1], 1 );
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
|
|||
vx_status status = VX_FAILURE;
|
||||
vx_tensor input = (vx_tensor)param[0];
|
||||
vsi_nn_kernel_tensor_attr_t * attr_in = NULL;
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
vsi_size_array_t * in_shape = NULL;
|
||||
vsi_bool image_2d = FALSE;
|
||||
|
||||
attr_in = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input );
|
||||
|
|
@ -270,11 +270,11 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if( !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[0]->attr.size,
|
||||
if( !vsi_nn_kernel_gpu_check_shape( inputs[0]->attr.size,
|
||||
inputs[0]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)inputs[1]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( inputs[1]->attr.size,
|
||||
inputs[1]->attr.dim_num )
|
||||
|| !vsi_nn_kernel_gpu_check_shape( (int32_t*)outputs[0]->attr.size,
|
||||
|| !vsi_nn_kernel_gpu_check_shape( outputs[0]->attr.size,
|
||||
outputs[0]->attr.dim_num ))
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -78,13 +78,13 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t in_stride_size[_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
int32_t i;
|
||||
float mean = .0f, stddev_inv = .0f, variance = .0f, input_d = .0f, data = .0f, eps = .0f;
|
||||
int32_t v_size, n_batch, batch;
|
||||
vsi_ssize_t v_size, n_batch, batch;
|
||||
/* prepare data */
|
||||
for(i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
|
|
@ -114,10 +114,10 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
{
|
||||
float sum = 0.0f;
|
||||
float sum_sq = 0.0f;
|
||||
int32_t index_base = batch * v_size;
|
||||
vsi_ssize_t index_base = batch * v_size;
|
||||
for (i = 0; i < v_size; ++i)
|
||||
{
|
||||
int32_t index = i + index_base;
|
||||
vsi_ssize_t index = i + index_base;
|
||||
input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
|
||||
sum += input_d;
|
||||
sum_sq += input_d * input_d;
|
||||
|
|
@ -138,7 +138,7 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
|
||||
for (i = 0; i < v_size; ++i)
|
||||
{
|
||||
int32_t index = i + index_base;
|
||||
vsi_ssize_t index = i + index_base;
|
||||
input_d = f32_in_buffer[0][index] + f32_in_buffer[1][index];
|
||||
data = (input_d - mean) * stddev_inv;
|
||||
f32_out_buffer[0][index] = data;
|
||||
|
|
|
|||
|
|
@ -60,11 +60,11 @@ DEF_KERNEL_EXECUTOR(_argmax_exec)
|
|||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
int32_t i;
|
||||
int32_t axis = 0;
|
||||
int32_t outerSize = 1;
|
||||
int32_t axisSize = 1;
|
||||
int32_t innerSize = 1;
|
||||
int32_t inner = 0;
|
||||
int32_t outer = 0;
|
||||
vsi_ssize_t outerSize = 1;
|
||||
vsi_ssize_t axisSize = 1;
|
||||
vsi_ssize_t innerSize = 1;
|
||||
vsi_ssize_t inner = 0;
|
||||
vsi_ssize_t outer = 0;
|
||||
|
||||
|
||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||
|
|
|
|||
|
|
@ -61,11 +61,11 @@ DEF_KERNEL_EXECUTOR(_argmin_exec)
|
|||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
int32_t i;
|
||||
int32_t axis = 0;
|
||||
int32_t outerSize = 1;
|
||||
int32_t axisSize = 1;
|
||||
int32_t innerSize = 1;
|
||||
int32_t inner = 0;
|
||||
int32_t outer = 0;
|
||||
vsi_ssize_t outerSize = 1;
|
||||
vsi_ssize_t axisSize = 1;
|
||||
vsi_ssize_t innerSize = 1;
|
||||
vsi_ssize_t inner = 0;
|
||||
vsi_ssize_t outer = 0;
|
||||
|
||||
|
||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||
|
|
|
|||
|
|
@ -110,16 +110,16 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i;
|
||||
const uint32_t roiLength = 4;
|
||||
const uint32_t imageLength = 2;
|
||||
uint32_t numClasses = 0;
|
||||
uint32_t numRois = 0;
|
||||
uint32_t j;
|
||||
uint32_t roiIndex;
|
||||
vsi_size_t numClasses = 0;
|
||||
vsi_size_t numRois = 0;
|
||||
vsi_size_t j;
|
||||
vsi_size_t roiIndex;
|
||||
|
||||
/* prepare data */
|
||||
for (i = 0; i < _INPUT_NUM; i ++)
|
||||
|
|
@ -163,7 +163,7 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
vsi_nn_box_encoding_center roi_ctr;
|
||||
vsi_nn_box_encoding_corner roiAfter;
|
||||
vsi_nn_box_encoding_corner cliped;
|
||||
uint32_t index = (roiIndex * numClasses + j) * roiLength;
|
||||
vsi_size_t index = (roiIndex * numClasses + j) * roiLength;
|
||||
|
||||
roi_ctr.w = (float)(exp(f32_in_buffer[1][index + 2]) * roiBefore.w);
|
||||
roi_ctr.h = (float)(exp(f32_in_buffer[1][index + 3]) * roiBefore.h);
|
||||
|
|
|
|||
|
|
@ -46,21 +46,21 @@ __BEGIN_DECLS
|
|||
#define _CPU_PARAM_NUM (_CPU_ARG_NUM + _CPU_IO_NUM)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("batch_norm_sw")
|
||||
|
||||
static int32_t _expand_offset
|
||||
static vsi_ssize_t _expand_offset
|
||||
(
|
||||
int32_t index,
|
||||
int32_t * shape, size_t rank,
|
||||
size_t * strides, int32_t * out_shape
|
||||
vsi_ssize_t index,
|
||||
vsi_size_t * shape, vsi_size_t rank,
|
||||
vsi_size_t * strides, vsi_size_t * out_shape
|
||||
)
|
||||
{
|
||||
uint32_t i;
|
||||
int32_t offset = 0;
|
||||
vsi_size_t i;
|
||||
vsi_ssize_t offset = 0;
|
||||
|
||||
for( i = 0; i < rank && index; i ++ )
|
||||
{
|
||||
if( shape[i] == out_shape[i] )
|
||||
{
|
||||
offset += (int32_t)strides[i] * ( index % out_shape[i] );
|
||||
offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
|
||||
}
|
||||
index /= out_shape[i];
|
||||
}
|
||||
|
|
@ -77,8 +77,8 @@ DEF_KERNEL_EXECUTOR(_batch_norm_exec)
|
|||
vsi_status status = VX_SUCCESS;
|
||||
vsi_nn_kernel_tensor_t tensors[_CPU_IO_NUM] = { NULL };
|
||||
float * buffer[_CPU_IO_NUM] = { NULL };
|
||||
size_t out_elements = 0;
|
||||
size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t out_elements = 0;
|
||||
vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
uint32_t i = 0;
|
||||
float eps = 0.f;
|
||||
|
|
@ -107,7 +107,7 @@ DEF_KERNEL_EXECUTOR(_batch_norm_exec)
|
|||
|
||||
for( i = 0; i < out_elements; i ++ )
|
||||
{
|
||||
int32_t in_offset[5] = {0};
|
||||
vsi_ssize_t in_offset[5] = {0};
|
||||
int32_t j = 0;
|
||||
float src = 0.f;
|
||||
float mean = 0.f;
|
||||
|
|
@ -117,7 +117,7 @@ DEF_KERNEL_EXECUTOR(_batch_norm_exec)
|
|||
|
||||
for ( j = 0; j < 5; j++)
|
||||
{
|
||||
in_offset[j] = _expand_offset( i, attr[j]->shape->data, attr[j]->shape->size,
|
||||
in_offset[j] = _expand_offset( i, attr[j]->shape->data, (vsi_size_t)attr[j]->shape->size,
|
||||
stride_size[j], attr[5]->shape->data );
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -227,9 +227,9 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
int32_t* int32_out_buffer[_OUTPUT_NUM] = {0};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i = 0;
|
||||
float score_threshold = 0;
|
||||
int32_t max_num_detections = 0;
|
||||
|
|
@ -300,8 +300,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
#undef VSI_NN_KERNEL_READ_SCALAR
|
||||
|
||||
numRois = in_attr[0]->shape->data[1];
|
||||
numClasses = in_attr[0]->shape->data[0];
|
||||
numRois = (uint32_t)in_attr[0]->shape->data[1];
|
||||
numClasses = (uint32_t)in_attr[0]->shape->data[0];
|
||||
|
||||
batch_data = (uint32_t*)malloc(numRois * sizeof(uint32_t));
|
||||
CHECK_PTR_FAIL_GOTO( batch_data, "Create batch_data fail.", final );
|
||||
|
|
|
|||
|
|
@ -74,9 +74,9 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i;
|
||||
double max_value = 0.0f, min_value = 0.0f;
|
||||
vsi_bool clamp_flag = FALSE;
|
||||
|
|
|
|||
|
|
@ -79,9 +79,9 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i;
|
||||
float min_value = 0.0f;
|
||||
float max_value = 0.0f;
|
||||
|
|
|
|||
|
|
@ -55,21 +55,21 @@ typedef enum
|
|||
} relational_type_e;
|
||||
|
||||
|
||||
static int32_t _expand_offset
|
||||
static vsi_ssize_t _expand_offset
|
||||
(
|
||||
int32_t index,
|
||||
int32_t * shape, size_t rank,
|
||||
size_t * strides, int32_t * out_shape
|
||||
vsi_ssize_t index,
|
||||
vsi_size_t * shape, vsi_size_t rank,
|
||||
vsi_size_t * strides, vsi_size_t * out_shape
|
||||
)
|
||||
{
|
||||
uint32_t i;
|
||||
int32_t offset = 0;
|
||||
vsi_size_t i;
|
||||
vsi_ssize_t offset = 0;
|
||||
|
||||
for( i = 0; i < rank && index; i ++ )
|
||||
{
|
||||
if( shape[i] == out_shape[i] )
|
||||
{
|
||||
offset += (int32_t)strides[i] * ( index % out_shape[i] );
|
||||
offset += (vsi_ssize_t)strides[i] * ( index % out_shape[i] );
|
||||
}
|
||||
index /= out_shape[i];
|
||||
}
|
||||
|
|
@ -88,7 +88,7 @@ DEF_KERNEL_EXECUTOR(_comparisons_exec)
|
|||
float * buffer[_CPU_IO_NUM] = { NULL };
|
||||
size_t out_elements = 0;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
vsi_size_t stride_size[_CPU_INPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{0}};
|
||||
int32_t i = 0;
|
||||
int32_t operation = 0;
|
||||
|
||||
|
|
@ -124,15 +124,15 @@ DEF_KERNEL_EXECUTOR(_comparisons_exec)
|
|||
|
||||
for (i = 0; i < (int32_t)out_elements; i++)
|
||||
{
|
||||
int32_t in0_offset = 0;
|
||||
int32_t in1_offset = 0;
|
||||
vsi_ssize_t in0_offset = 0;
|
||||
vsi_ssize_t in1_offset = 0;
|
||||
float val1 = 0.f;
|
||||
float val2 = 0.f;
|
||||
vsi_bool data = 0;
|
||||
|
||||
in0_offset = _expand_offset( i, attr[0]->shape->data, attr[0]->shape->size,
|
||||
in0_offset = _expand_offset( i, attr[0]->shape->data, (vsi_size_t)attr[0]->shape->size,
|
||||
stride_size[0], attr[2]->shape->data );
|
||||
in1_offset = _expand_offset( i, attr[1]->shape->data, attr[1]->shape->size,
|
||||
in1_offset = _expand_offset( i, attr[1]->shape->data, (vsi_size_t)attr[1]->shape->size,
|
||||
stride_size[1], attr[2]->shape->data );
|
||||
|
||||
val1 = buffer[0][in0_offset];
|
||||
|
|
|
|||
|
|
@ -120,16 +120,16 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
CHECK_STATUS_FAIL_GOTO(status, final);
|
||||
|
||||
{
|
||||
int32_t batch = attr[0]->shape->data[2];
|
||||
int32_t input_channel = attr[0]->shape->data[1];
|
||||
int32_t input_height = attr[0]->shape->data[0];
|
||||
int32_t kernel_size = attr[1]->shape->data[0];
|
||||
int32_t output_channel = attr[1]->shape->data[2];
|
||||
int32_t output_height = attr[3]->shape->data[0];
|
||||
int32_t batch_index = 0;
|
||||
int32_t input_channel_index = 0;
|
||||
int32_t output_channel_index = 0;
|
||||
int32_t output_h_index = 0;
|
||||
vsi_ssize_t batch = attr[0]->shape->data[2];
|
||||
vsi_ssize_t input_channel = attr[0]->shape->data[1];
|
||||
vsi_ssize_t input_height = attr[0]->shape->data[0];
|
||||
vsi_ssize_t kernel_size = attr[1]->shape->data[0];
|
||||
vsi_ssize_t output_channel = attr[1]->shape->data[2];
|
||||
vsi_ssize_t output_height = attr[3]->shape->data[0];
|
||||
vsi_ssize_t batch_index = 0;
|
||||
vsi_ssize_t input_channel_index = 0;
|
||||
vsi_ssize_t output_channel_index = 0;
|
||||
vsi_ssize_t output_h_index = 0;
|
||||
|
||||
for(batch_index = 0; batch_index < batch; batch_index++)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -85,21 +85,21 @@ DEF_KERNEL_EXECUTOR(_depth2space_crd_exec)
|
|||
memset( buffer[1], 0, out_elements * sizeof(float) );
|
||||
|
||||
{
|
||||
uint32_t output_batch = attr[1]->shape->size > 3 ? attr[1]->shape->data[3] : 1;
|
||||
uint32_t output_depth = attr[1]->shape->data[2];
|
||||
uint32_t output_height = attr[1]->shape->data[1];
|
||||
uint32_t output_width = attr[1]->shape->data[0];
|
||||
uint32_t input_depth = attr[0]->shape->data[2];
|
||||
uint32_t input_height = attr[0]->shape->data[1];
|
||||
uint32_t input_width = attr[0]->shape->data[0];
|
||||
uint32_t batch = 0, out_h = 0, out_w = 0;
|
||||
vsi_size_t output_batch = attr[1]->shape->size > 3 ? attr[1]->shape->data[3] : 1;
|
||||
vsi_size_t output_depth = attr[1]->shape->data[2];
|
||||
vsi_size_t output_height = attr[1]->shape->data[1];
|
||||
vsi_size_t output_width = attr[1]->shape->data[0];
|
||||
vsi_size_t input_depth = attr[0]->shape->data[2];
|
||||
vsi_size_t input_height = attr[0]->shape->data[1];
|
||||
vsi_size_t input_width = attr[0]->shape->data[0];
|
||||
vsi_size_t batch = 0, out_h = 0, out_w = 0;
|
||||
|
||||
for (batch = 0; batch < output_batch; ++ batch)
|
||||
{
|
||||
uint32_t output_batch_index = batch * output_height * output_width * output_depth;
|
||||
uint32_t input_batch_index = batch * input_height * input_width * input_depth;
|
||||
uint32_t out_d = 0;
|
||||
uint32_t block_e2 = block_size * block_size;
|
||||
vsi_size_t output_batch_index = batch * output_height * output_width * output_depth;
|
||||
vsi_size_t input_batch_index = batch * input_height * input_width * input_depth;
|
||||
vsi_size_t out_d = 0;
|
||||
vsi_size_t block_e2 = block_size * block_size;
|
||||
|
||||
for (out_d = 0; out_d < output_depth; out_d ++)
|
||||
{
|
||||
|
|
@ -107,13 +107,13 @@ DEF_KERNEL_EXECUTOR(_depth2space_crd_exec)
|
|||
{
|
||||
for (out_w = 0; out_w < output_width; out_w ++)
|
||||
{
|
||||
uint32_t in_w = out_w / block_size;
|
||||
uint32_t in_h = out_h / block_size;
|
||||
uint32_t in_d = (out_w % block_size) + (out_h % block_size) * block_size + out_d * block_e2;
|
||||
vsi_size_t in_w = out_w / block_size;
|
||||
vsi_size_t in_h = out_h / block_size;
|
||||
vsi_size_t in_d = (out_w % block_size) + (out_h % block_size) * block_size + out_d * block_e2;
|
||||
|
||||
uint32_t in_index = in_w + in_h * input_width + in_d * input_width * input_height
|
||||
vsi_size_t in_index = in_w + in_h * input_width + in_d * input_width * input_height
|
||||
+ input_batch_index;
|
||||
uint32_t out_index = out_w + out_h * output_width + out_d * output_height * output_width
|
||||
vsi_size_t out_index = out_w + out_h * output_width + out_d * output_height * output_width
|
||||
+ output_batch_index;
|
||||
|
||||
buffer[1][out_index] = buffer[0][in_index];
|
||||
|
|
|
|||
|
|
@ -84,11 +84,11 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i;
|
||||
uint32_t n, a, numBatches, numAnchors, lengthBoxEncoding;
|
||||
vsi_size_t n, a, numBatches, numAnchors, lengthBoxEncoding;
|
||||
uint32_t kRoiDim = 4;
|
||||
float inv_scale_y = 0.0f;
|
||||
float inv_scale_x = 0.0f;
|
||||
|
|
@ -127,8 +127,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
|
||||
for ( n = 0; n < numBatches; n++ )
|
||||
{
|
||||
int32_t batch_in_offset = n * numAnchors * lengthBoxEncoding;
|
||||
int32_t batch_out_offset = n * numAnchors * kRoiDim;
|
||||
vsi_ssize_t batch_in_offset = n * numAnchors * lengthBoxEncoding;
|
||||
vsi_ssize_t batch_out_offset = n * numAnchors * kRoiDim;
|
||||
for ( a = 0; a < numAnchors; a++ )
|
||||
{
|
||||
float yCtr = f32_in_buffer[1][a * kRoiDim] + f32_in_buffer[1][a * kRoiDim + 2]
|
||||
|
|
|
|||
|
|
@ -201,11 +201,11 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i, j;
|
||||
uint32_t n, a, c, b, numBatches, numAnchors, numClasses;
|
||||
vsi_size_t n, a, c, b, numBatches, numAnchors, numClasses;
|
||||
int32_t nms_type = 0;
|
||||
int32_t max_num_detections = 0;
|
||||
int32_t maximum_class_per_detection = 0;
|
||||
|
|
@ -213,7 +213,7 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float score_threshold = 0.0f;
|
||||
float iou_threshold = 0.0f;
|
||||
int32_t is_bg_in_label = 0;
|
||||
uint32_t numOutDetection = 0;
|
||||
vsi_size_t numOutDetection = 0;
|
||||
|
||||
/* prepare data */
|
||||
for ( i = 0; i < _INPUT_NUM; i++ )
|
||||
|
|
@ -250,11 +250,11 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
numOutDetection = out_attr[0]->shape->data[0];
|
||||
|
||||
{
|
||||
uint32_t scores_index = 0;
|
||||
uint32_t scores_out_index = 0;
|
||||
vsi_size_t scores_index = 0;
|
||||
vsi_size_t scores_out_index = 0;
|
||||
uint32_t kRoiDim = 4;
|
||||
uint32_t roi_out_index = 0;
|
||||
uint32_t class_out_index = 0;
|
||||
vsi_size_t roi_out_index = 0;
|
||||
vsi_size_t class_out_index = 0;
|
||||
uint32_t* select = (uint32_t*)malloc(numAnchors * numClasses * sizeof(uint32_t));
|
||||
float* maxScores = (float*)malloc(numAnchors * sizeof(float));
|
||||
uint32_t* scoreInds = (uint32_t*)malloc((numClasses - 1) * sizeof(uint32_t));
|
||||
|
|
@ -273,10 +273,10 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
select_start = select_size;
|
||||
for ( b = 0; b < numAnchors; b++ )
|
||||
{
|
||||
const uint32_t index = b * numClasses + c;
|
||||
const vsi_size_t index = b * numClasses + c;
|
||||
float score = f32_in_buffer[0][scores_index + index];
|
||||
if (score > score_threshold) {
|
||||
select[select_size] = index;
|
||||
select[select_size] = (uint32_t)index;
|
||||
select_size++;
|
||||
}
|
||||
}
|
||||
|
|
@ -297,8 +297,8 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
// Calculate IoU of the rest, swap to the end (disgard) if needed.
|
||||
for ( i = j + 1; i < select_len; i++ )
|
||||
{
|
||||
int32_t roiBase0 = (select[select_start + i] / numClasses) * kRoiDim;
|
||||
int32_t roiBase1 = (select[select_start + j] / numClasses) * kRoiDim;
|
||||
vsi_ssize_t roiBase0 = (select[select_start + i] / numClasses) * kRoiDim;
|
||||
vsi_ssize_t roiBase1 = (select[select_start + j] / numClasses) * kRoiDim;
|
||||
float iou = _getIoUAxisAligned(&(roiBuffer[roiBase0]),
|
||||
&(roiBuffer[roiBase1]));
|
||||
|
||||
|
|
@ -335,7 +335,7 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
}
|
||||
else
|
||||
{
|
||||
uint32_t numOutClasses = vsi_nn_min(numClasses - 1, (uint32_t)maximum_class_per_detection);
|
||||
vsi_size_t numOutClasses = vsi_nn_min(numClasses - 1, (uint32_t)maximum_class_per_detection);
|
||||
uint32_t select_size = 0;
|
||||
uint32_t select_start = 0;
|
||||
uint32_t select_len = 0;
|
||||
|
|
@ -344,10 +344,10 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
{
|
||||
// exclude background class: 0
|
||||
maxScores[a] = _max_element_value(&(f32_in_buffer[0]
|
||||
[scores_index + a * numClasses + 1]), numClasses - 1);
|
||||
[scores_index + a * numClasses + 1]), (uint32_t)(numClasses - 1));
|
||||
if (maxScores[a] > score_threshold)
|
||||
{
|
||||
select[select_size] = a;
|
||||
select[select_size] = (uint32_t)a;
|
||||
select_size++;
|
||||
}
|
||||
}
|
||||
|
|
@ -385,9 +385,9 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
|
||||
for ( i = 0; i < select_len; i++ )
|
||||
{
|
||||
_iota((int32_t*)scoreInds, numClasses - 1, 1);
|
||||
_iota((int32_t*)scoreInds, (uint32_t)(numClasses - 1), 1);
|
||||
_sort_element_by_score(&(f32_in_buffer[0][scores_index + select[i] * numClasses]),
|
||||
scoreInds, numClasses - 1);
|
||||
scoreInds, (uint32_t)(numClasses - 1));
|
||||
for (c = 0; c < numOutClasses; c++)
|
||||
{
|
||||
f32_out_buffer[0][scores_out_index + i * numOutClasses + c] =
|
||||
|
|
|
|||
|
|
@ -76,9 +76,9 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
size_t i = 0;
|
||||
|
||||
/* prepare data */
|
||||
|
|
|
|||
|
|
@ -76,9 +76,9 @@ DEF_KERNEL_EXECUTOR(_compute)
|
|||
uint8_t *u8_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM] = {NULL};
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
vsi_size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
vsi_size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
int32_t i = 0;
|
||||
|
||||
/* prepare data */
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue