diff --git a/prebuilt-sdk/x86_64_linux/VERSION b/prebuilt-sdk/x86_64_linux/VERSION index effb0ed..40da7fc 100644 --- a/prebuilt-sdk/x86_64_linux/VERSION +++ b/prebuilt-sdk/x86_64_linux/VERSION @@ -1 +1 @@ -REL/6.4.6 +REL/6.4.8 diff --git a/prebuilt-sdk/x86_64_linux/include/CL/cl_viv_vx_ext.h b/prebuilt-sdk/x86_64_linux/include/CL/cl_viv_vx_ext.h index f5e2df1..02286d8 100644 --- a/prebuilt-sdk/x86_64_linux/include/CL/cl_viv_vx_ext.h +++ b/prebuilt-sdk/x86_64_linux/include/CL/cl_viv_vx_ext.h @@ -300,9 +300,13 @@ enum VXC_OP { VXC_OP_dp4x8_b, VXC_OP_dp2x16_b, VXC_OP_img_load, + VXC_OP_img_read, VXC_OP_img_load_3d, + VXC_OP_img_read_3d, VXC_OP_img_store, + VXC_OP_img_write, VXC_OP_img_store_3d, + VXC_OP_img_write_3d, VXC_OP_vload2, VXC_OP_vload3, VXC_OP_vload4, @@ -534,8 +538,8 @@ enum eVXC_ERROR * offset should be composed by using VXC_5BITOFFSET_XY(x, y) * Coord must be type of int4 or float4 */ -#define VXC_ReadImage3D(Dest, Image, Coord, Offset, Info) VXC_OP4(img_load_3d, Dest, Image, Coord, Offset, Info) -#define VXC_WriteImage3D(Image, Coord, Color, Info) VXC_OP4_NoDest(img_store_3d, Image, Coord, Color, Info) +#define VXC_ReadImage3D(Dest, Image, Coord, Offset, Info) VXC_OP4(img_read_3d, Dest, Image, Coord, Offset, Info) +#define VXC_WriteImage3D(Image, Coord, Color, Info) VXC_OP4_NoDest(img_write_3d, Image, Coord, Color, Info) #define VXC_Vload2(Dest, Pointer, Offset) do { int byteOffset = ((int)sizeof((Dest)))*(Offset); VXC_OP2(vload2, Dest, Pointer, byteOffset); } while(0) #define VXC_Vload4(Dest, Pointer, Offset) do { int byteOffset = ((int)sizeof((Dest)))*(Offset); VXC_OP2(vload4, Dest, Pointer, byteOffset); } while(0) diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h index 6e8c9a0..6c3671e 100644 --- a/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h +++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_kernels.h @@ -494,6 +494,8 @@ enum vx_kernel_e { VX_KERNEL_NN_CONVOLUTION_RELU_POOLING_MULTIPLY_LAYER2 = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x2E, + VX_KERNEL_NN_BATCH_GEMM = VX_KERNEL_BASE(VX_ID_VIVANTE, VX_LIBRARY_KHR_BASE) + 0x2F, + VX_KERNEL_MAX_1_2, /*!< \internal Used for VX1.2 bounds checking in the conformance test. */ }; diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h index e682779..e3baa23 100644 --- a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h +++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_compatible.h @@ -112,4 +112,12 @@ VX_CREATE_TENSOR_SUPPORT_PHYSICAL is used to declare that openvx can support phy */ #define VX_GRAPH_PREEMPTION_SUPPORT 1 +/* +VX_BATCH_GEMM_API_SUPPORT is used to declare that vsi openvx driver can support vxBatchGemmNode API to transform gemm to convolution + [value] + 0: not support + 1: support +*/ +#define VX_BATCH_GEMM_API_SUPPORT 1 + #endif /* __VX_KHR_COMPATIBLE_H__ */ diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h index 71c2932..41e1653 100644 --- a/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h +++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_khr_nn_internal.h @@ -216,6 +216,7 @@ typedef struct _vx_nn_convolution_relu_pooling_params_ext4_t vx_uint32 poolingPadRight; vx_uint32 poolingPadTop; vx_uint32 poolingPadBottom; + vx_bool enable_nn_tensor_add_relu; /*!< \brief Enable Relu function after tensor add. */ } vx_nn_convolution_relu_pooling_params_ext4_t, * vx_nn_convolution_relu_pooling_params_ext4; /*! \brief [Graph] Creates a Convolutional Network Convolution and Activation(Relu) and Pooling Layer Node, this fucntion match kronos NN Extension 1.2 verion. @@ -816,7 +817,8 @@ VX_API_ENTRY vx_node VX_API_CALL vxL2NormalizeLayer(vx_graph graph, vx_tensor in * \param [in] weights_biases [static] Point to WeightBiasesParameter data, vx_weights_biases_parameter is an opaque reference. * \param [in] convolution_relu_pooling_params [static] Pointer to parameters of type \ref vx_nn_convolution_relu_pooling_params_t * \param [in] size_of_convolution_relu_pooling_params [static] Size in bytes of convolution_relu_pooling_params. - * \param [out] outputs_conv The convolution output tensor data. Output will have the same number and structure of dimensions as inputs_conv. + * \param [in] outputs_conv The convolution output tensor data. Output will have the same number and structure of dimensions as inputs_conv. + * We uses this tensor to provide format information of convolution output data to hardware, don't really return convolution output data. * \param [out] outputs_add The final add output tensor data. Output will have the same number and structure of dimensions as input. * \return vx_node. * \returns A node reference \ref vx_node. Any possible errors preventing a @@ -863,7 +865,8 @@ VX_API_ENTRY vx_node VX_API_CALL vxConvolutionReluPoolingAddLayer2( * \param [in] weights_biases [static] Point to WeightBiasesParameter data, vx_weights_biases_parameter is an opaque reference. * \param [in] convolution_relu_pooling_params [static] Pointer to parameters of type \ref vx_nn_convolution_relu_pooling_params_t * \param [in] size_of_convolution_relu_pooling_params [static] Size in bytes of convolution_relu_pooling_params. - * \param [out] outputs_conv The convolution output tensor data. Output will have the same number and structure of dimensions as inputs_conv. + * \param [in] outputs_conv The convolution output tensor data. Output will have the same number and structure of dimensions as inputs_conv. + * We uses this tensor to provide format information of convolution output data to hardware, don't really return convolution output data. * \param [out] outputs_mul The final mul output tensor data. Output will have the same number and structure of dimensions as input. * \return vx_node. * \returns A node reference \ref vx_node. Any possible errors preventing a diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_nodes.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_nodes.h index 3bfb7f2..bf513b5 100644 --- a/prebuilt-sdk/x86_64_linux/include/VX/vx_nodes.h +++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_nodes.h @@ -940,6 +940,29 @@ VX_API_ENTRY vx_node VX_API_CALL vxTensorMatrixMultiplyNode(vx_graph graph, vx_t */ VX_API_ENTRY vx_node VX_API_CALL vxCopyNode(vx_graph graph, vx_reference input, vx_reference output); +/*! \brief Create a batch gemm node, the calcution formula is output = matrix_a * matrix_b + matrix_c. + * \param [in] graph The reference to the graph. + * \param [in] matrix_a The first input tensor. + * \param [in] matrix_b The second input tensor. Must be in the same data type and batch count as first input tensor. + * \param [in] matrix_c The third input tensor. Must be in the same data type and batch count as first input tensor. [optional] + * \param [in] trans_a If true, the matrix_a has been transposed before calcution. + * \param [in] trans_b If true, the matrix_b has been transposed before calcution. + * \param [in] trans_c If true, the matrix_c has been transposed before calcution. [optional] + * \param [out] output The output tensor. Output dimension must agree the formula in the description. + * \return \ref vx_node. + * \retval vx_node A node reference. Any possible errors preventing a successful creation + * should be checked using \ref vxGetStatus + * \ingroup group_vision_function_gemm + */ +VX_API_ENTRY vx_node VX_API_CALL vxBatchGemmNode(vx_graph graph, + vx_tensor matrix_a, + vx_tensor matrix_b, + vx_tensor matrix_c, + vx_scalar trans_a, + vx_scalar trans_b, + vx_scalar trans_c, + vx_tensor output); + #ifdef __cplusplus } #endif diff --git a/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h b/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h index cbff50c..0881c15 100644 --- a/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h +++ b/prebuilt-sdk/x86_64_linux/include/VX/vx_types.h @@ -1288,6 +1288,8 @@ enum vx_channel_e { VX_CHANNEL_U = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CHANNEL) + 0x15, /*! \brief Use to extract the Cr/V/Value channel, no matter the byte or packing order. */ VX_CHANNEL_V = VX_ENUM_BASE(VX_ID_KHRONOS, VX_ENUM_CHANNEL) + 0x16, + + VX_CHANNEL_UV = VX_ENUM_BASE(VX_ID_VIVANTE, VX_ENUM_CHANNEL) + 0x0, }; /*! \brief An enumeration of memory import types. diff --git a/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so b/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so index 2780651..575b344 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so and b/prebuilt-sdk/x86_64_linux/lib/libArchModelSw.so differ diff --git a/prebuilt-sdk/x86_64_linux/lib/libCLC.so b/prebuilt-sdk/x86_64_linux/lib/libCLC.so index 032cf6f..ff87c25 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libCLC.so and b/prebuilt-sdk/x86_64_linux/lib/libCLC.so differ diff --git a/prebuilt-sdk/x86_64_linux/lib/libEmulator.so b/prebuilt-sdk/x86_64_linux/lib/libEmulator.so index 783dccf..dbd7197 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libEmulator.so and b/prebuilt-sdk/x86_64_linux/lib/libEmulator.so differ diff --git a/prebuilt-sdk/x86_64_linux/lib/libGAL.so b/prebuilt-sdk/x86_64_linux/lib/libGAL.so index b1d825f..0439666 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libGAL.so and b/prebuilt-sdk/x86_64_linux/lib/libGAL.so differ diff --git a/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so b/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so index 6e87b6e..9a4e15c 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so and b/prebuilt-sdk/x86_64_linux/lib/libNNArchPerf.so differ diff --git a/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0 b/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0 index 622ef8f..99ec9c8 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0 and b/prebuilt-sdk/x86_64_linux/lib/libOpenVX.so.1.3.0 differ diff --git a/prebuilt-sdk/x86_64_linux/lib/libOpenVXC.so b/prebuilt-sdk/x86_64_linux/lib/libOpenVXC.so index 64d2d68..44e37de 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libOpenVXC.so and b/prebuilt-sdk/x86_64_linux/lib/libOpenVXC.so differ diff --git a/prebuilt-sdk/x86_64_linux/lib/libVSC.so b/prebuilt-sdk/x86_64_linux/lib/libVSC.so index c398988..07646f8 100755 Binary files a/prebuilt-sdk/x86_64_linux/lib/libVSC.so and b/prebuilt-sdk/x86_64_linux/lib/libVSC.so differ