Update internal to REL/v1.1.30

Commit: 6ccb425e
Signed-off-by: Jiang Bo <bo.jiang@verisilicon.com>
This commit is contained in:
Jiang Bo 2021-02-26 14:01:35 +08:00 committed by Kainan Cha
parent 62898a4419
commit def53f4b5c
77 changed files with 8294 additions and 225 deletions

335
.gitignore vendored Normal file
View File

@ -0,0 +1,335 @@
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates
*-[Dd]ebug/
*-[Dd]ebugPublic/
*-[Rr]elease/
*-[Rr]eleases/
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Build results
*.o
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
NNApi0.3/
NNApi0.4/
OpenVX1.2/
bazel-bin
bazel-genfiles
bazel-out
bazel-ovxlib
bazel-testlogs
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# VS code
.vscode
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUNIT
*.VisualState.xml
TestResult.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
**/Properties/launchSettings.json
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# JustCode is a .NET coding add-in
.JustCode
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# TypeScript v1 declaration files
typings/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# JetBrains Rider
.idea/
*.sln.iml
# CodeRush
.cr/
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# IDE
.settings/

View File

@ -144,3 +144,5 @@ DEF_OP(PRE_PROCESS_YUV444)
DEF_OP(PRE_PROCESS_NV12) DEF_OP(PRE_PROCESS_NV12)
DEF_OP(SCATTER_ND) DEF_OP(SCATTER_ND)
DEF_OP(DECONVOLUTION1D) DEF_OP(DECONVOLUTION1D)
DEF_OP(INTERP)
DEF_OP(RESIZE_1D)

View File

@ -14,3 +14,5 @@ DEF_OP(RESIZE_NEAREST_INTERNAL)
DEF_OP(DEPTH2SPACE_INTERNAL) DEF_OP(DEPTH2SPACE_INTERNAL)
DEF_OP(GRUCELL_ACTIVATION_INTERNAL) DEF_OP(GRUCELL_ACTIVATION_INTERNAL)
DEF_OP(GRUCELL_ACTIVATION_INTERNAL_SMA) DEF_OP(GRUCELL_ACTIVATION_INTERNAL_SMA)
DEF_OP(RESIZE_1D_BILINEAR_INTERNAL)
DEF_OP(RESIZE_1D_NEAREST_INTERNAL)

View File

@ -44,6 +44,7 @@ typedef struct _vsi_nn_elu_param
{ {
/* elu layer local data structure */ /* elu layer local data structure */
vsi_nn_elu_lcl_data local; vsi_nn_elu_lcl_data local;
float alpha;
} vsi_nn_elu_param; } vsi_nn_elu_param;
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -0,0 +1,44 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_INTERP_H
#define _VSI_NN_OP_INTERP_H
#include "vsi_nn_types.h"
typedef struct _vsi_nn_interp_param
{
struct _interp_local_data_t* local;
int32_t height; //height of output
int32_t width; //width of output
int32_t zoom_factor; // zoom factor
int32_t shrink_factor; // shrink factor
int32_t pad_beg; //padding at begin of input
int32_t pad_end; //padding at end of intput
} vsi_nn_interp_param;
#endif

View File

@ -0,0 +1,44 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_RESIZE_1D_H
#define _VSI_NN_OP_RESIZE_1D_H
#include "vsi_nn_types.h"
typedef struct _vsi_nn_resize_1d_param
{
struct _resize_1d_local_data_t* local;
vsi_enum type;
float factor;
int32_t size[2];
vsi_bool align_corners;
vsi_bool half_pixel_centers;
} vsi_nn_resize_1d_param;
_compiler_assert(offsetof(vsi_nn_resize_1d_param, local) == 0, \
vsi_nn_resize_1d_h );
#endif

View File

@ -0,0 +1,42 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
#define _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
#include "vsi_nn_types.h"
typedef struct _vsi_nn_resize_1d_bilinear_internal_param
{
struct _resize_1d_bilinear_internal_local_data_t* local;
vsi_bool align_corners;
vsi_bool half_pixel_centers;
float factor;
} vsi_nn_resize_1d_bilinear_internal_param;
_compiler_assert(offsetof(vsi_nn_resize_1d_bilinear_internal_param, local) == 0, \
vsi_nn_resize_1d_bilinear_internal_h );
#endif

View File

@ -0,0 +1,42 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#ifndef _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
#define _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
#include "vsi_nn_types.h"
typedef struct _vsi_nn_resize_1d_nearest_internal_param
{
struct _resize_1d_nearest_internal_local_data_t* local;
vsi_bool align_corners;
vsi_bool half_pixel_centers;
float factor;
} vsi_nn_resize_1d_nearest_internal_param;
_compiler_assert(offsetof(vsi_nn_resize_1d_nearest_internal_param, local) == 0, \
vsi_nn_resize_1d_nearest_internal_h );
#endif

View File

@ -195,13 +195,6 @@ OVXLIB_API vsi_bool vsi_nn_CheckFilePath
const char *path const char *path
); );
OVXLIB_API void vsi_nn_GetFP32MultiAndPostShift
(
vx_float32 mult,
vx_uint16 *M0,
vx_int8 *N
);
/** /**
* Malloc aligned buffer * Malloc aligned buffer
* Malloc address and size aligned buffer. * Malloc address and size aligned buffer.

View File

@ -32,6 +32,13 @@
extern "C" { extern "C" {
#endif #endif
vx_tensor vsi_nn_CreateRawTensorFromData
(
vsi_nn_graph_t * graph,
uint8_t * data,
vsi_nn_tensor_attr_t * attr
);
vsi_status vsi_nn_OptimizeGraph vsi_status vsi_nn_OptimizeGraph
( (
vsi_nn_graph_t* graph, vsi_nn_graph_t* graph,

View File

@ -158,6 +158,10 @@
#include "ops/vsi_nn_op_squeeze.h" #include "ops/vsi_nn_op_squeeze.h"
#include "ops/vsi_nn_op_expand_broadcast.h" #include "ops/vsi_nn_op_expand_broadcast.h"
#include "ops/vsi_nn_op_deconvolution1d.h" #include "ops/vsi_nn_op_deconvolution1d.h"
#include "ops/vsi_nn_op_interp.h"
#include "ops/vsi_nn_op_resize_1d.h"
#include "ops/vsi_nn_op_resize_1d_bilinear_internal.h"
#include "ops/vsi_nn_op_resize_1d_nearest_internal.h"
/* custom node head define define */ /* custom node head define define */
#include "custom/vsi_nn_custom_node_type.h" #include "custom/vsi_nn_custom_node_type.h"
@ -302,6 +306,10 @@ typedef union _vsi_nn_nn_param
vsi_nn_squeeze_param squeeze; vsi_nn_squeeze_param squeeze;
vsi_nn_expand_broadcast_param expand_broadcast; vsi_nn_expand_broadcast_param expand_broadcast;
vsi_nn_deconvolution1d_param deconvolution1d; vsi_nn_deconvolution1d_param deconvolution1d;
vsi_nn_interp_param interp;
vsi_nn_resize_1d_param resize_1d;
vsi_nn_resize_1d_bilinear_internal_param resize_1d_bilinear_internal;
vsi_nn_resize_1d_nearest_internal_param resize_1d_nearest_internal;
uint8_t client_param[128]; uint8_t client_param[128];
/* custom node data struct define */ /* custom node data struct define */

View File

@ -33,7 +33,7 @@ extern "C"{
#define VSI_NN_VERSION_MAJOR 1 #define VSI_NN_VERSION_MAJOR 1
#define VSI_NN_VERSION_MINOR 1 #define VSI_NN_VERSION_MINOR 1
#define VSI_NN_VERSION_PATCH 28 #define VSI_NN_VERSION_PATCH 30
#define VSI_NN_VERSION \ #define VSI_NN_VERSION \
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH) (VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)

View File

@ -117,6 +117,7 @@ LOCAL_C_INCLUDES += \
LOCAL_CFLAGS := \ LOCAL_CFLAGS := \
-DLINUX \ -DLINUX \
-D'OVXLIB_API=__attribute__((visibility("default")))' \ -D'OVXLIB_API=__attribute__((visibility("default")))' \
-DANDROID_SDK_VERSION=$(PLATFORM_SDK_VERSION)\
-Wno-sign-compare \ -Wno-sign-compare \
-Wno-implicit-function-declaration \ -Wno-implicit-function-declaration \
-Wno-sometimes-uninitialized \ -Wno-sometimes-uninitialized \

View File

@ -168,12 +168,14 @@ static vx_param_description_t kernel_param_def[] =
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
}; };
#define SCALAR_INPUT_SCALE (2) #define SCALAR_INPUT_SCALE (2)
#define SCALAR_INPUT_TAIL (3) #define SCALAR_INPUT_TAIL (3)
#define SCALAR_OUTPUT_SCALE (4) #define SCALAR_OUTPUT_SCALE (4)
#define SCALAR_OUTPUT_ZP (5) #define SCALAR_OUTPUT_ZP (5)
#define SCALAR_ALPHA (6)
#define _CL_PARAM_NUM _cnt_of_array(kernel_param_def) #define _CL_PARAM_NUM _cnt_of_array(kernel_param_def)
/* /*
@ -293,6 +295,7 @@ static vsi_nn_kernel_node_t _setup
float inputTail = (float)inputs[0]->attr.dtype.zero_point * inputScale; float inputTail = (float)inputs[0]->attr.dtype.zero_point * inputScale;
float outputScale = outputs[0]->attr.dtype.scale; float outputScale = outputs[0]->attr.dtype.scale;
float outputZP = (float)outputs[0]->attr.dtype.zero_point + 0.5f; float outputZP = (float)outputs[0]->attr.dtype.zero_point + 0.5f;
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
ret = vsi_nn_kernel_optimize_element_shape( ret = vsi_nn_kernel_optimize_element_shape(
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num, (int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
@ -331,6 +334,8 @@ static vsi_nn_kernel_node_t _setup
graph, F32, &outputScale ); graph, F32, &outputScale );
node_params[SCALAR_OUTPUT_ZP] = vsi_nn_kernel_scalar_create( node_params[SCALAR_OUTPUT_ZP] = vsi_nn_kernel_scalar_create(
graph, F32, &outputZP ); graph, F32, &outputZP );
node_params[SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
graph, F32, &alpha );
/* Pass parameters to node. */ /* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM ); status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
@ -369,6 +374,11 @@ OnError:
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_ZP] ); vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_ZP] );
} }
if (node_params[SCALAR_ALPHA])
{
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALPHA] );
}
return node; return node;
} /* _setup() */ } /* _setup() */

View File

@ -356,7 +356,8 @@ static vsi_nn_kernel_node_t _setup
int32_t out_shape[VSI_NN_MAX_DIM_NUM] = {0}; int32_t out_shape[VSI_NN_MAX_DIM_NUM] = {0};
int32_t out_rs_flg = 0; int32_t out_rs_flg = 0;
int32_t axis_num = 0; int32_t axis_num = 0;
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num); size_t axis_num_temp = 0;
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
int32_t keep_dim = vsi_nn_kernel_param_get_int32( params, "keep_dim" ); int32_t keep_dim = vsi_nn_kernel_param_get_int32( params, "keep_dim" );
int32_t first_axis = axis[0]; int32_t first_axis = axis[0];
int32_t i = 0; int32_t i = 0;
@ -369,6 +370,8 @@ static vsi_nn_kernel_node_t _setup
float input_scale = inputs[0]->attr.dtype.scale; float input_scale = inputs[0]->attr.dtype.scale;
float dim_ratio = (float)1.0 / (float)(width * height); float dim_ratio = (float)1.0 / (float)(width * height);
axis_num = (int32_t)axis_num_temp;
if(inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP) if(inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP)
{ {
if (inputs[0]->attr.dtype.fl > 0) if (inputs[0]->attr.dtype.fl > 0)

View File

@ -0,0 +1,305 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vx_lib_nnext.h"
__BEGIN_DECLS
#define _RESIZE_1D_BILINEAR_KERNEL_SOURCE() "resize_1d_bilinear"
#define STR(a) #a
// Add kernel hashtable here
#define RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
(( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) )
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
{ RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
CVIVANTE_NAMESPACE("cl.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
_RESIZE_1D_BILINEAR_KERNEL_SOURCE() }
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _resize_1d_bilinear_kernel_map[] =
{
// Register kernel here
PACK_KERNEL_MAP( F32, F32),
PACK_KERNEL_MAP( U8, U8),
};
/*
* Kernel params
*/
static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _RESIZE_1D_BILINEAR_PARAM_NUM _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
#define SCALAR_SCALE_X (2)
#define SCALAR_HALF_PIXEL (3)
#define SCALAR_INPUT_SCALE (4)
#define SCALAR_INPUT_TAIL (5)
#define SCALAR_OUTPUT_SCALE (6)
#define SCALAR_OUTPUT_TAIL (7)
#define RESIZE_1D_BILINEAR_NUM 4
#define RESIZE_1D_BILINEAR_QUANT_NUM _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_resize_1d_bilinear_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
vsi_int_array_t * out_shape = NULL;
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
out_shape = output_attr->shape;
gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
gpu_param.global_size[0] = gpu_align_p2(
(out_shape->data[0] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0], 4);
gpu_param.global_size[1] = (
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1]);
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
SAFE_FREE_TENSOR_ATTR(output_attr);
return status;
} /* _resize_1d_bilinear_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
vsi_bool *is_use_u8_kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype = F16;
vsi_nn_kernel_dtype_e out_dtype = F16;
const _kernel_map_type * kernel_map = _resize_1d_bilinear_kernel_map;
size_t kernel_map_size = _cnt_of_array( _resize_1d_bilinear_kernel_map );
vx_param_description_t * param_def = _resize_1d_bilinear_kernel_param_def;
size_t param_def_size = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
vx_kernel_initialize_f initializer = _resize_1d_bilinear_initializer;
uint32_t key = 0;
uint32_t i = 0;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (F16 == in_dtype)
{
in_dtype = F32;
}
if (F16 == out_dtype)
{
out_dtype = F32;
}
if ((U8 == in_dtype) || (U8 == out_dtype))
{
param_def_size = RESIZE_1D_BILINEAR_QUANT_NUM;
*is_use_u8_kernel = TRUE;
}
else
{
param_def_size = RESIZE_1D_BILINEAR_NUM;
*is_use_u8_kernel = FALSE;
}
key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype );
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = (uint32_t)param_def_size;
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[RESIZE_1D_BILINEAR_QUANT_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
int32_t in_width = inputs[0]->attr.size[0];
int32_t out_width = outputs[0]->attr.size[0];
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
float input_scale = inputs[0]->attr.dtype.scale;
float input_tail = -(input_zp * input_scale);
float output_zp = (float)outputs[0]->attr.dtype.zero_point;
float output_scale = (0 == outputs[0]->attr.dtype.scale) ? 1.0f : 1.0f / outputs[0]->attr.dtype.scale;
float half_pixel_value = 0.0f;
float scale_factor_x = 0.0f;
vsi_bool is_use_u8_kernel = FALSE;
if (align_corners && out_width > 1)
{
scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
}
else
{
scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
}
if (half_pixel_centers)
{
half_pixel_value = 0.5f;
}
else
{
half_pixel_value = 0.0f;
}
status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
size_t node_params_num = RESIZE_1D_BILINEAR_NUM;
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, RESIZE_1D_BILINEAR_QUANT_NUM,
inputs, input_num, outputs, output_num );
node_params[SCALAR_SCALE_X] = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
if (is_use_u8_kernel)
{
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input_scale );
node_params[SCALAR_INPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &input_tail );
node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
node_params[SCALAR_OUTPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &output_zp );
node_params_num = RESIZE_1D_BILINEAR_QUANT_NUM;
}
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
VSI_ASSERT( status == VSI_SUCCESS );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
if (is_use_u8_kernel)
{
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_SCALE] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
}
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( resize_1d_bilinear, _setup )

View File

@ -0,0 +1,312 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vx_lib_nnext.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
typedef enum
{
INTERNAL_KERNEL_RESIZE_1D_NEAREST,
} _internal_kernel_e;
#define _RESIZE_1D_NEAREST_KERNEL_SOURCE "resize_1d_nearest"
#define STR(a) #a
// Add kernel hashtable here
#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
{ RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
CVIVANTE_NAMESPACE("cl.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
_RESIZE_1D_NEAREST_KERNEL_SOURCE }
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
{
// Register kernel here
PACK_KERNEL_MAP( F32, F32),
PACK_KERNEL_MAP( U8, U8),
};
/*
* Kernel params
*/
static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _RESIZE_1D_NEAREST_PARAM_NUM 5
#define _RESIZE_1D_NEAREST_QUANT_NUM _cnt_of_array( _resize_1d_nearest_kernel_param_def )
#define SCALAR_SCALE_X (2)
#define SCALAR_HALF_PIXEL (3)
#define SCALAR_ROUND_VALUE (4)
#define SCALAR_SCALE_VALUE (5)
#define SCALAR_TAIL_VALUE (6)
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
vsi_int_array_t * out_shape = NULL;
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
out_shape = output_attr->shape;
gpu_param.global_scale[0] = 1;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
gpu_param.global_size[0] = gpu_align_p2(
(out_shape->data[0] + gpu_param.global_scale[0] - 1)
/ gpu_param.global_scale[0], 4);
gpu_param.global_size[1] = (
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
/ gpu_param.global_scale[1]);
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
final:
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
SAFE_FREE_TENSOR_ATTR(output_attr);
return status;
} /* _resize_1d_nearest_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
vsi_bool *is_use_u8_kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype = F16;
vsi_nn_kernel_dtype_e out_dtype = F16;
const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
size_t kernel_map_size = _cnt_of_array( _resize_1d_nearest_kernel_map );
vx_param_description_t * param_def = _resize_1d_nearest_kernel_param_def;
size_t param_def_size = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
vx_kernel_initialize_f initializer = _resize_1d_nearest_initializer;
uint32_t key = 0;
uint32_t i = 0;
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (F16 == in_dtype)
{
in_dtype = F32;
}
if (F16 == out_dtype)
{
out_dtype = F32;
}
if ((U8 == in_dtype) || (U8 == out_dtype))
{
param_def_size = _RESIZE_1D_NEAREST_QUANT_NUM;
*is_use_u8_kernel = TRUE;
}
else
{
param_def_size = _RESIZE_1D_NEAREST_PARAM_NUM;
*is_use_u8_kernel = FALSE;
}
key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype );
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if ( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = (uint32_t)param_def_size;
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_QUANT_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
int32_t in_width = inputs[0]->attr.size[0];
int32_t out_width = outputs[0]->attr.size[0];
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
float input_scale = inputs[0]->attr.dtype.scale;
float output_scale = (0 == outputs[0]->attr.dtype.scale) ? \
input_scale : input_scale / outputs[0]->attr.dtype.scale;
float output_tail = (float)outputs[0]->attr.dtype.zero_point - input_zp * output_scale;
float half_pixel_value = 0.0f;
float round_value = 0.0f;
float scale_factor_x = 0.0f;
vsi_bool is_use_u8_kernel = FALSE;
if (align_corners && out_width > 1)
{
scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
}
else
{
scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
}
if (align_corners)
{
round_value = 0.5f;
}
else
{
round_value = 0.0f;
}
if (half_pixel_centers)
{
half_pixel_value = 0.5f;
}
else
{
half_pixel_value = 0.0f;
}
status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
size_t node_params_num = _RESIZE_1D_NEAREST_PARAM_NUM;
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[SCALAR_SCALE_X] = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
node_params[SCALAR_ROUND_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &round_value );
if (is_use_u8_kernel)
{
node_params[SCALAR_SCALE_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
node_params[SCALAR_TAIL_VALUE] = vsi_nn_kernel_scalar_create(graph, F32, &output_tail );
node_params_num = _RESIZE_1D_NEAREST_QUANT_NUM;
}
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ROUND_VALUE] );
if (is_use_u8_kernel)
{
vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_VALUE] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_TAIL_VALUE] );
}
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CL( resize_1d_nearest, _setup )

View File

@ -49,7 +49,7 @@ typedef enum
} unary_type_e; } unary_type_e;
#define _CPU_ARG_NUM (1) #define _CPU_ARG_NUM (2)
#define _CPU_INPUT_NUM (1) #define _CPU_INPUT_NUM (1)
#define _CPU_OUTPUT_NUM (1) #define _CPU_OUTPUT_NUM (1)
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM) #define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
@ -71,9 +71,9 @@ static float log_eval(float data)
return logf(data); return logf(data);
} }
static float elu_eval(float data) static float elu_eval(float data, float alpha)
{ {
return data >=0 ? data : expf(data) - 1; return data >=0 ? data : expf(data) * alpha - alpha;
} }
static float neg_eval(float data) static float neg_eval(float data)
@ -114,6 +114,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
size_t out_elements = 0; size_t out_elements = 0;
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL }; vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
int32_t i; int32_t i;
float alpha = 0;
int32_t unary_type = 0; int32_t unary_type = 0;
tensors[0] = (vsi_nn_kernel_tensor_t)param[0]; tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
@ -126,6 +127,8 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &unary_type); status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &unary_type);
CHECK_STATUS_FAIL_GOTO(status, final ); CHECK_STATUS_FAIL_GOTO(status, final );
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[3], &alpha);
CHECK_STATUS_FAIL_GOTO(status, final );
buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE ); buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final ); CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
@ -151,7 +154,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
data = log_eval(data); data = log_eval(data);
break; break;
case UNARY_ELU: case UNARY_ELU:
data = elu_eval(data); data = elu_eval(data, alpha);
break; break;
case UNARY_NEG: case UNARY_NEG:
data = neg_eval(data); data = neg_eval(data);
@ -193,9 +196,11 @@ static vx_param_description_t kernel_param_def[] =
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}, {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
}; };
#define INPUT_FUNC_TYPE (2) #define INPUT_FUNC_TYPE (2)
#define INPUT_SCALAR_ALPHA (3)
static const vx_kernel_description_t _kernel_info = static const vx_kernel_description_t _kernel_info =
{ {
@ -237,6 +242,7 @@ static vsi_nn_kernel_node_t _setup
vsi_status status = VSI_SUCCESS; vsi_status status = VSI_SUCCESS;
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL}; vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
status = _query_kernel( inputs, outputs, kernel ); status = _query_kernel( inputs, outputs, kernel );
if( VSI_SUCCESS == status) if( VSI_SUCCESS == status)
@ -249,10 +255,13 @@ static vsi_nn_kernel_node_t _setup
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM ); inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
backend_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create( backend_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
graph, I32, &unary_type ); graph, I32, &unary_type );
backend_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
graph, F32, &alpha );
/* Pass parameters to node. */ /* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM ); status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
vsi_nn_kernel_scalar_release( &backend_params[INPUT_FUNC_TYPE] ); vsi_nn_kernel_scalar_release( &backend_params[INPUT_FUNC_TYPE] );
vsi_nn_kernel_scalar_release( &backend_params[INPUT_SCALAR_ALPHA] );
} }
else else
{ {

View File

@ -258,11 +258,14 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL}; vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
int32_t axis_num = 0; int32_t axis_num = 0;
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num); size_t axis_num_temp = 0;
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
vsi_bool is_continue_axis = TRUE; vsi_bool is_continue_axis = TRUE;
uint32_t mask = 0; uint32_t mask = 0;
int32_t i = 0; int32_t i = 0;
axis_num = (int32_t)axis_num_temp;
for ( i = 1; i < axis_num; i++) for ( i = 1; i < axis_num; i++)
{ {
if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0) if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)

View File

@ -0,0 +1,271 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vx_lib_nnext.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_1d_bilinear")
/*
* Kernel params
*/
static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _RESIZE_1D_BILINEAR_PARAM_NUM _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
#define SCALAR_ALIGN_CORNERS (2)
#define SCALAR_HALF_PIXEL (3)
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
size_t out_elements[_OUTPUT_NUM] = {0};
size_t out_bytes[_OUTPUT_NUM] = {0};
uint32_t i = 0;
int32_t align_corners = 0;
int32_t half_pixel_centers = 0;
float width_scale = 1.0f;
uint32_t input_width = 0, output_width = 0;
uint32_t w = 0, out = 0;
uint32_t output_dims = 0;
float data00 = .0f, data01 = .0f, interpolation = .0f;
uint32_t index = 0;
uint32_t outer = 0;
/* prepare data */
for (i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
}
for (i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
input_width = in_attr[0]->shape->data[0];
output_width = out_attr[0]->shape->data[0];
output_dims = (uint32_t)out_attr[0]->shape->size;
if (align_corners && output_width > 1)
{
width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
}
else
{
width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
}
outer = 1;
for (i = 1; i < output_dims; i++)
{
outer = outer * out_attr[0]->shape->data[i];
}
for (out = 0; out < outer; out++)
{
vx_int32 input_base = out * input_width;
vx_int32 output_base = out * output_width;
for (w = 0; w < output_width; w ++)
{
vx_float32 input_w;
vx_int32 w0;
vx_int32 w1;
if (half_pixel_centers)
{
input_w = ((vx_float32)w + 0.5f) * width_scale - 0.5f;
}
else
{
input_w = w * width_scale;
}
w0 = (vx_int32)input_w;
w1 = input_w < 0 ? 0 : vsi_nn_min(w0 + 1, (vx_int32)(input_width - 1));
index = input_base + w0;
data00 = f32_in_buffer[0][index];
index = input_base + w1;
data01 = f32_in_buffer[0][index];
interpolation = data00 * (1 - (input_w - w0)) +
data01 * (input_w - w0);
index = output_base + w;
f32_out_buffer[0][index] = interpolation;
}
}
/* save data */
for (i = 0; i < _OUTPUT_NUM; i++)
{
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
for (i = 0; i < _INPUT_NUM; i++)
{
if (f32_in_buffer[i])
{
free(f32_in_buffer[i]);
f32_in_buffer[i] = NULL;
}
if (in_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
}
for (i = 0; i < _OUTPUT_NUM; i++)
{
if (f32_out_buffer[i])
{
free(f32_out_buffer[i]);
f32_out_buffer[i] = NULL;
}
if (out_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _resize_1d_bilinear_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
status = VSI_SUCCESS;
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_BILINEAR_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
status = _query_kernel( kernel, inputs, outputs );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_BILINEAR_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_BILINEAR_PARAM_NUM );
VSI_ASSERT( status == VSI_SUCCESS );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( resize_1d_bilinear, _setup )

View File

@ -0,0 +1,271 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vx_lib_nnext.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_1d_nearest")
/*
* Kernel params
*/
static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _RESIZE_1D_NEAREST_PARAM_NUM _cnt_of_array( _resize_1d_nearest_kernel_param_def )
#define SCALAR_ALIGN_CORNERS (2)
#define SCALAR_HALF_PIXEL (3)
/*
* Kernel function
*/
DEF_KERNEL_EXECUTOR(_compute)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
float *f32_in_buffer[_INPUT_NUM] = {NULL};
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
size_t out_elements[_OUTPUT_NUM] = {0};
size_t out_bytes[_OUTPUT_NUM] = {0};
uint32_t i = 0;
int32_t align_corners = 0;
int32_t half_pixel_centers = 0;
float width_scale = 1.0f;
uint32_t input_width = 0, output_width = 0;
uint32_t w = 0, out = 0;
uint32_t output_dims = 0;
uint32_t outer = 0;
/* prepare data */
for (i = 0; i < _INPUT_NUM; i ++)
{
input[i] = (vsi_nn_kernel_tensor_t)param[i];
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
}
for (i = 0; i < _OUTPUT_NUM; i ++)
{
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
out_bytes[i] = out_elements[i] * sizeof(float);
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
memset( f32_out_buffer[i], 0, out_bytes[i] );
}
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
input_width = in_attr[0]->shape->data[0];
output_width = out_attr[0]->shape->data[0];
output_dims = (uint32_t)out_attr[0]->shape->size;
if (align_corners && output_width > 1)
{
width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
}
else
{
width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
}
outer = 1;
for (i = 1; i < output_dims; i++)
{
outer = outer * out_attr[0]->shape->data[i];
}
for (out = 0; out < outer; out++)
{
vx_int32 input_base = out * input_width;
vx_int32 output_base = out * output_width;
for (w = 0; w < output_width; w ++)
{
float input_w;
uint32_t in_x;
int32_t in_index;
int32_t out_index;
if (half_pixel_centers)
{
input_w = ((float)w + 0.5f) * width_scale;
}
else
{
input_w = w * width_scale;
}
if (align_corners)
{
in_x = vsi_nn_min((uint32_t)simple_round(input_w), input_width - 1);
}
else
{
in_x = vsi_nn_min((uint32_t)floorf(input_w), input_width - 1);
}
in_index = in_x + input_base;
out_index = w + output_base;
f32_out_buffer[0][out_index] = f32_in_buffer[0][in_index];
}
}
/* save data */
for (i = 0; i < _OUTPUT_NUM; i++)
{
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
f32_out_buffer[i], out_elements[i] );
CHECK_STATUS_FAIL_GOTO( status, final );
}
final:
for (i = 0; i < _INPUT_NUM; i++)
{
if (f32_in_buffer[i])
{
free(f32_in_buffer[i]);
f32_in_buffer[i] = NULL;
}
if (in_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
}
}
for (i = 0; i < _OUTPUT_NUM; i++)
{
if (f32_out_buffer[i])
{
free(f32_out_buffer[i]);
f32_out_buffer[i] = NULL;
}
if (out_attr[i])
{
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
}
}
return status;
} /* _compute() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs
/* Add extra params */
)
{
vsi_status status = VSI_FAILURE;
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
kernel->info.function = _compute;
kernel->info.parameters = _resize_1d_nearest_kernel_param_def;
kernel->info.numParams = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
status = VSI_SUCCESS;
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM];
vsi_nn_kernel_node_t node = NULL;
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
status = _query_kernel( kernel, inputs, outputs );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
VSI_ASSERT( status == VSI_SUCCESS );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_CPU( resize_1d_nearest, _setup )

View File

@ -279,10 +279,10 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
if( U8 == input_dtype && F16 == output_dtype ) if( U8 == input_dtype && F16 == output_dtype )
{ {
vx_uint16 M0 = 0; uint16_t M0 = 0;
vx_int8 postShift = 0; int32_t postShift = 0;
vx_uint32 multAndoutZP0[2] = {0}; uint32_t multAndoutZP0[2] = {0};
vx_uint32 multAndoutZP1[2] = {0}; uint32_t multAndoutZP1[2] = {0};
gpu_dp_inst_t uniU8MulAndPostShift_0_Lo_2x8 = {{ gpu_dp_inst_t uniU8MulAndPostShift_0_Lo_2x8 = {{
0xdddddddd, // TCfg 0xdddddddd, // TCfg
@ -305,12 +305,12 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant 0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16}; }, GPU_DP_TYPE_16};
vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift); gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);
multAndoutZP0[0] = (vx_uint32)(M0); multAndoutZP0[0] = (vx_uint32)(M0);
multAndoutZP0[1] = (vx_uint32)((output_ZP << postShift) - input_ZP * M0); multAndoutZP0[1] = (vx_uint32)((output_ZP << postShift) - input_ZP * M0);
uniU8MulAndPostShift_0_Lo_2x8.data[7] |= (postShift & 0x1F); uniU8MulAndPostShift_0_Lo_2x8.data[7] |= (postShift & 0x1F);
vsi_nn_GetFP32MultiAndPostShift(scaleIn1 / scaleOut, &M0, &postShift); gpu_quantize_multiplier_16bit(scaleIn1 / scaleOut, &M0, &postShift);
multAndoutZP1[0] = (vx_uint32)(M0); multAndoutZP1[0] = (vx_uint32)(M0);
multAndoutZP1[1] = (vx_uint32)((output_ZP << postShift) - input_ZP1 * M0); multAndoutZP1[1] = (vx_uint32)((output_ZP << postShift) - input_ZP1 * M0);
uniU8MulAndPostShift_1_Lo_2x8.data[7] |= (postShift & 0x1F); uniU8MulAndPostShift_1_Lo_2x8.data[7] |= (postShift & 0x1F);

View File

@ -268,7 +268,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
{ {
uint32_t multAndoutZP[2] = {0}; uint32_t multAndoutZP[2] = {0};
uint16_t M0 = 0; uint16_t M0 = 0;
int8_t postShift = 0; int32_t postShift = 0;
gpu_dp_inst_t uniDataMulAndPostShift_2x8 = {{ gpu_dp_inst_t uniDataMulAndPostShift_2x8 = {{
0xdddddddd, // TCfg 0xdddddddd, // TCfg
0x44444444, // ASelt 0x44444444, // ASelt
@ -279,7 +279,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant 0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16}; }, GPU_DP_TYPE_16};
vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift); gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);
multAndoutZP[0] = (uint32_t)(M0); multAndoutZP[0] = (uint32_t)(M0);
multAndoutZP[1] = (uint32_t)(output_ZP << postShift ); multAndoutZP[1] = (uint32_t)(output_ZP << postShift );
@ -434,8 +434,8 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
int32_t packedMaxData[4]; int32_t packedMaxData[4];
float uint8Scale = scaleIn / scaleOut; float uint8Scale = scaleIn / scaleOut;
uint16_t M0 = 0; uint16_t M0 = 0;
int8_t postShift = 0; int32_t postShift = 0;
uint32_t multAndoutZP[2] = {0}; uint32_t multAndoutZP[2] = {0};
gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{ gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
0xdddddddd, // TCfg 0xdddddddd, // TCfg
0x44444444, // ASelt 0x44444444, // ASelt
@ -457,7 +457,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant 0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16}; }, GPU_DP_TYPE_16};
vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift); gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
multAndoutZP[0] = (uint32_t)(M0); multAndoutZP[0] = (uint32_t)(M0);
multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0); multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);

View File

@ -702,27 +702,66 @@ static vsi_nn_kernel_node_t _setup
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM]; vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
int32_t weight_pad_front[VSI_NN_MAX_DIM_NUM] = {0}; int32_t weight_pad_front[VSI_NN_MAX_DIM_NUM] = {0};
int32_t weight_pad_end[VSI_NN_MAX_DIM_NUM] = {0}; int32_t weight_pad_end[VSI_NN_MAX_DIM_NUM] = {0};
vsi_nn_tensor_t * weights = NULL; vsi_nn_tensor_t * weights = NULL;
vsi_nn_tensor_t * biases = NULL; vsi_nn_tensor_t * biases = NULL;
vsi_nn_tensor_t *temp_tensor[3] = {NULL}; vsi_nn_tensor_t *temp_tensor[3] = {NULL};
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
int32_t new_rank = 2;
uint32_t i = 0;
int32_t stride = vsi_nn_kernel_param_get_int32( params, "stride" ); int32_t stride = vsi_nn_kernel_param_get_int32( params, "stride" );
int32_t pad_front = vsi_nn_kernel_param_get_int32( params, "pad_front" ); int32_t pad_front = vsi_nn_kernel_param_get_int32( params, "pad_front" );
int32_t pad_end = vsi_nn_kernel_param_get_int32( params, "pad_end" ); int32_t pad_end = vsi_nn_kernel_param_get_int32( params, "pad_end" );
int32_t dilation = vsi_nn_kernel_param_get_int32( params, "dilation" ); int32_t dilation = vsi_nn_kernel_param_get_int32( params, "dilation" );
_internal_kernel_size_e ks = KN; _internal_kernel_size_e ks = KN;
weight_pad_end[0] = gpu_align_np2_safe(inputs[1]->attr.size[0], 8) - inputs[1]->attr.size[0]; if (!((VSI_NN_TYPE_UINT8 == inputs[0]->attr.dtype.vx_type)
&& (VSI_NN_TYPE_UINT8 == inputs[1]->attr.dtype.vx_type)
&& (NULL == inputs[2] || VSI_NN_TYPE_INT32 == inputs[2]->attr.dtype.vx_type)
&& (VSI_NN_TYPE_UINT8 == outputs[0]->attr.dtype.vx_type)))
{
return NULL;
}
weights = vsi_nn_pad_tensor(graph, inputs[1], weight_pad_front, weight_pad_end, inputs[1]->attr.dim_num, reshape_tensors[0] = inputs[0];
VSI_NN_PAD_MODE_CONSTANT, 0);
biases = vsi_nn_merge_input_zeropoint_to_bias(graph, inputs[0], inputs[1], inputs[2]); if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
{
shape[0] = inputs[1]->attr.size[0];
shape[1] = 1;
for (i = 1; i < inputs[1]->attr.dim_num; i++)
{
shape[1] *= inputs[1]->attr.size[i];
}
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
inputs[1], (uint32_t*)shape, new_rank );
}
else
{
reshape_tensors[1] = inputs[1];
}
temp_tensor[0] = inputs[0]; if (inputs[2] && inputs[2]->attr.dim_num == 1)
{
shape[0] = inputs[2]->attr.size[0];
shape[1] = 1;
new_rank = 2;
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
inputs[2], (uint32_t*)shape, new_rank );
}
weight_pad_end[0] = gpu_align_np2_safe(reshape_tensors[1]->attr.size[0], 8) - reshape_tensors[1]->attr.size[0];
weights = vsi_nn_pad_tensor(graph, reshape_tensors[1], weight_pad_front, weight_pad_end,
reshape_tensors[1]->attr.dim_num, VSI_NN_PAD_MODE_CONSTANT, 0);
biases = vsi_nn_merge_input_zeropoint_to_bias(graph, reshape_tensors[0], reshape_tensors[1], reshape_tensors[2]);
temp_tensor[0] = reshape_tensors[0];
temp_tensor[1] = weights; temp_tensor[1] = weights;
temp_tensor[2] = biases; temp_tensor[2] = biases;
@ -760,6 +799,16 @@ static vsi_nn_kernel_node_t _setup
} }
} }
if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
{
vsi_nn_ReleaseTensor( &reshape_tensors[1] );
}
if (inputs[2] && inputs[2]->attr.dim_num == 1)
{
vsi_nn_ReleaseTensor( &reshape_tensors[2] );
}
if (weights) if (weights)
{ {
vsi_nn_ReleaseTensor(&weights); vsi_nn_ReleaseTensor(&weights);

View File

@ -164,9 +164,9 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
else if ((U8 == input_attr->dtype) || (U8 == input1_attr->dtype)) else if ((U8 == input_attr->dtype) || (U8 == input1_attr->dtype))
{ {
uint16_t M0 = 0; uint16_t M0 = 0;
int8_t postShift0 = 0; int32_t postShift0 = 0;
uint16_t M1 = 0; uint16_t M1 = 0;
int8_t postShift1 = 0; int32_t postShift1 = 0;
uint32_t i = 0; uint32_t i = 0;
gpu_dp_inst_t uniU8SubZptoF32Conv0_4x4 = {{ gpu_dp_inst_t uniU8SubZptoF32Conv0_4x4 = {{
0x09090909, // TCfg 0x09090909, // TCfg
@ -188,8 +188,8 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
0x00010001, 0x00000000, 0x00010001, 0x00000000, 0x00010001, 0x00000000, 0x00010001, 0x00000000,
0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant 0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
}, GPU_DP_TYPE_16 }; }, GPU_DP_TYPE_16 };
vsi_nn_GetFP32MultiAndPostShift(scaleIn0, &M0, &postShift0); gpu_quantize_multiplier_16bit(scaleIn0, &M0, &postShift0);
vsi_nn_GetFP32MultiAndPostShift(scaleIn1, &M1, &postShift1); gpu_quantize_multiplier_16bit(scaleIn1, &M1, &postShift1);
uniU8SubZptoF32Conv0_4x4.data[7] |= (postShift0 & 0x1F); uniU8SubZptoF32Conv0_4x4.data[7] |= (postShift0 & 0x1F);
uniU8SubZptoF32Conv1_4x4.data[7] |= (postShift1 & 0x1F); uniU8SubZptoF32Conv1_4x4.data[7] |= (postShift1 & 0x1F);
for ( i = 0; i < 8; i++ ) for ( i = 0; i < 8; i++ )

View File

@ -266,9 +266,11 @@ static vx_param_description_t kernel_param_def[] =
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED}, {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED}, {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
}; };
#define INPUT_FUNC_TYPE (2) #define INPUT_FUNC_TYPE (2)
#define INPUT_SCALAR_ALPHA (3)
#define _CL_PARAM_NUM _cnt_of_array(kernel_param_def) #define _CL_PARAM_NUM _cnt_of_array(kernel_param_def)
/* /*
@ -296,6 +298,7 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
float inputTail = 0; float inputTail = 0;
float outputScale = 1.0f; float outputScale = 1.0f;
float outputZP = 0; float outputZP = 0;
float alpha = 0;
uint32_t pack_key; uint32_t pack_key;
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] ); attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
@ -303,7 +306,9 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] ); attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final ); CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &type); status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[INPUT_FUNC_TYPE], &type);
CHECK_STATUS_FAIL_GOTO(status, final );
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[INPUT_SCALAR_ALPHA], &alpha);
CHECK_STATUS_FAIL_GOTO(status, final ); CHECK_STATUS_FAIL_GOTO(status, final );
out_shape = attr[1]->shape; out_shape = attr[1]->shape;
@ -408,6 +413,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
"uniConvBF16toF32_Part1_2x8", &uniConvBF16toF32_Part1_2x8 ); "uniConvBF16toF32_Part1_2x8", &uniConvBF16toF32_Part1_2x8 );
status |= vsi_nn_kernel_gpu_add_param( node, status |= vsi_nn_kernel_gpu_add_param( node,
"uniExtractOddData_2x8", &uniExtractOddData_2x8 ); "uniExtractOddData_2x8", &uniExtractOddData_2x8 );
status |= vsi_nn_kernel_gpu_add_param( node,
"alpha", &alpha );
CHECK_STATUS_FAIL_GOTO(status, final ); CHECK_STATUS_FAIL_GOTO(status, final );
} }
break; break;
@ -466,6 +473,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
"outputScale", &outputScale ); "outputScale", &outputScale );
status |= vsi_nn_kernel_gpu_add_param( node, status |= vsi_nn_kernel_gpu_add_param( node,
"outputZP", &outputZP ); "outputZP", &outputZP );
status |= vsi_nn_kernel_gpu_add_param( node,
"alpha", &alpha );
if (attr[1]->dtype == F16) if (attr[1]->dtype == F16)
{ {
@ -555,7 +564,8 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_tensor_t* rs_tensors[2] = { NULL }; vsi_nn_tensor_t* rs_tensors[2] = { NULL };
int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 }; int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
int32_t new_rank = 0; int32_t new_rank = 0;
vsi_bool ret; vsi_bool ret = FALSE;
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
ret = vsi_nn_kernel_optimize_element_shape( ret = vsi_nn_kernel_optimize_element_shape(
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num, (int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
@ -586,6 +596,8 @@ static vsi_nn_kernel_node_t _setup
rs_tensors, 1, &rs_tensors[1], 1 ); rs_tensors, 1, &rs_tensors[1], 1 );
node_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create( node_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
graph, I32, &unary_type ); graph, I32, &unary_type );
node_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
graph, F32, &alpha );
/* Pass parameters to node. */ /* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM ); status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
@ -609,6 +621,11 @@ OnError:
vsi_nn_kernel_scalar_release( &node_params[INPUT_FUNC_TYPE] ); vsi_nn_kernel_scalar_release( &node_params[INPUT_FUNC_TYPE] );
} }
if (node_params[INPUT_SCALAR_ALPHA])
{
vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_ALPHA] );
}
return node; return node;
} /* _setup() */ } /* _setup() */

View File

@ -181,8 +181,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
float dstScale = 0; float dstScale = 0;
uint16_t M0 = 0; uint16_t M0 = 0;
uint16_t M1 = 0; uint16_t M1 = 0;
int8_t postShift0 = 0; int32_t postShift0 = 0;
int8_t postShift1 = 0; int32_t postShift1 = 0;
uint32_t pack_key = 0; uint32_t pack_key = 0;
int32_t ac2zero = 0; int32_t ac2zero = 0;
@ -279,8 +279,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
dstScale = 1; dstScale = 1;
dstZP = 0.0f; dstZP = 0.0f;
} }
vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postShift0); gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postShift0);
vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postShift1); gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postShift1);
mulKIn0In1Zp = (float)((int)(K + 3) / 4 * 4 * src1ZP * src0ZP); mulKIn0In1Zp = (float)((int)(K + 3) / 4 * 4 * src1ZP * src0ZP);
inOutScale = src0Scale * src1Scale / dstScale; inOutScale = src0Scale * src1Scale / dstScale;

View File

@ -588,7 +588,8 @@ static vsi_nn_kernel_node_t _setup
vsi_nn_kernel_node_param_t node_params[_MOMENTS_PARAM_NUM] = { NULL }; vsi_nn_kernel_node_param_t node_params[_MOMENTS_PARAM_NUM] = { NULL };
vsi_nn_kernel_node_t node = NULL; vsi_nn_kernel_node_t node = NULL;
int32_t axis_num = 0; int32_t axis_num = 0;
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num); size_t axis_num_temp = 0;
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
int32_t axis_first = axis[0]; int32_t axis_first = axis[0];
int32_t shapes[2][VSI_NN_MAX_DIM_NUM] = { { 1, 1, 1, 1 } }; int32_t shapes[2][VSI_NN_MAX_DIM_NUM] = { { 1, 1, 1, 1 } };
vsi_nn_tensor_t* reshape_tensors[3] = { NULL }; vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
@ -602,6 +603,8 @@ static vsi_nn_kernel_node_t _setup
vsi_bool image_2d = FALSE; vsi_bool image_2d = FALSE;
vsi_bool is_continue_axis = TRUE; vsi_bool is_continue_axis = TRUE;
axis_num = (int32_t)axis_num_temp;
for ( i = 1; i < axis_num; i++) for ( i = 1; i < axis_num; i++)
{ {
if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0) if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)

View File

@ -139,7 +139,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
int32_t input_fl = 0; int32_t input_fl = 0;
int32_t output_fl = 0; int32_t output_fl = 0;
uint16_t M0 = 0; uint16_t M0 = 0;
int8_t postShift = 0; int32_t postShift = 0;
float inputScale = 1.0f; float inputScale = 1.0f;
int32_t input_ZP = 0; int32_t input_ZP = 0;
float outputScale = 1.0f; float outputScale = 1.0f;
@ -193,7 +193,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
if ( ( input_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM ) if ( ( input_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM )
&& ( output_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM ) ) && ( output_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM ) )
{ {
vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift); gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);
} }
image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]); image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);

View File

@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
float dstZP = 0; float dstZP = 0;
float dstScale = 1.0f; float dstScale = 1.0f;
int8_t postshift0 = 0; int32_t postshift0 = 0;
int8_t postshift1 = 0; int32_t postshift1 = 0;
float outScale_fl = 1; float outScale_fl = 1;
uint16_t M0 = 0; uint16_t M0 = 0;
@ -229,7 +229,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
src0ZP = attr[0]->asymm.zero_point; src0ZP = attr[0]->asymm.zero_point;
src0Scale = attr[0]->asymm.scale; src0Scale = attr[0]->asymm.scale;
vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postshift0); gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postshift0);
} }
if ( attr[1]->quant == VSI_NN_KERNEL_QUANT_DFP ) if ( attr[1]->quant == VSI_NN_KERNEL_QUANT_DFP )
@ -243,7 +243,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
src1ZP = attr[1]->asymm.zero_point; src1ZP = attr[1]->asymm.zero_point;
src1Scale = attr[1]->asymm.scale; src1Scale = attr[1]->asymm.scale;
vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postshift1); gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postshift1);
} }
if ( attr[2]->quant == VSI_NN_KERNEL_QUANT_DFP ) if ( attr[2]->quant == VSI_NN_KERNEL_QUANT_DFP )

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,533 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "vsi_nn_types.h"
#include "vsi_nn_tensor.h"
#include "vsi_nn_graph.h"
#include "vsi_nn_log.h"
#include "vsi_nn_error.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vx_lib_nnext.h"
__BEGIN_DECLS
/*
* Define kernel meta.
*/
typedef enum
{
LARGE = 0,
SMALL
} _internal_nearest_e;
#define _RESIZE_1D_NEAREST_KERNEL_SOURCE "resize_1d_nearest"
#define STR(a) #a
// Add kernel hashtable here
#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, mode ) \
(( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) | (mode))
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
{ RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, LARGE ), \
CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
_RESIZE_1D_NEAREST_KERNEL_SOURCE }
#define PACK_KERNEL_MAP_OPT( IN_DTYPE, OUT_DTYPE ) \
{ RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, SMALL ), \
CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_op"), \
_RESIZE_1D_NEAREST_KERNEL_SOURCE }
typedef struct
{
uint32_t key;
char * function_name;
const char * source_name;
} _kernel_map_type;
static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
{
// Register kernel here
PACK_KERNEL_MAP(F16, F16),
PACK_KERNEL_MAP(I16, I16),
PACK_KERNEL_MAP(I8, I8),
PACK_KERNEL_MAP(U8, U8),
PACK_KERNEL_MAP_OPT(F16, F16),
PACK_KERNEL_MAP_OPT(I16, I16),
PACK_KERNEL_MAP_OPT(I8, I8),
PACK_KERNEL_MAP_OPT(U8, U8),
};
/*
* Kernel params
*/
static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
{
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
};
#define _RESIZE_1D_NEAREST_PARAM_NUM _cnt_of_array( _resize_1d_nearest_kernel_param_def )
#define SCALAR_ALIGN_CORNERS (2)
#define SCALAR_HALF_PIXEL (3)
/*
* Kernel initializer
*/
DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
(
vsi_nn_kernel_node_t node,
const vsi_nn_kernel_node_param_t * param,
size_t param_size
)
{
#define MAX_POST_SHIFT_BITS (31)
#define MAX_MULTIPLIER_NUM (65535)
vsi_status status = VSI_FAILURE;
gpu_param_t gpu_param = {
3,
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0}
};
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
vsi_nn_kernel_tensor_attr_t * input_attr = NULL;
vsi_int_array_t * out_shape = NULL;
vsi_int_array_t * in_shape = NULL;
vsi_nn_kernel_dtype_e input_dtype = F16;
vsi_nn_kernel_dtype_e output_dtype = F16;
int32_t align_corners = 0;
int32_t half_pixel_centers = 0;
uint32_t depth = 0;
int32_t srcFixPointPos = 0;
int32_t dstFixPointPos = 0;
float input_scale = 1.0;
int32_t inputZP = 0;
float output_scale = 1.0;
int32_t outputZP = 0;
float scale_factor = 1.0f;
uint32_t in_width = 0;
uint32_t out_width = 0;
uint32_t out_height = 0;
float half_pixel_value = 0.0f;
float round_value = 0.0f;
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &align_corners);
CHECK_STATUS_FAIL_GOTO(status, final );
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &half_pixel_centers);
CHECK_STATUS_FAIL_GOTO(status, final );
out_shape = output_attr->shape;
in_shape = input_attr->shape;
input_dtype = input_attr->dtype;
output_dtype = output_attr->dtype;
in_width = in_shape->data[0];
depth = in_shape->data[2];
out_width = out_shape->data[0];
out_height = out_shape->data[1];
if (BF16 == input_dtype && output_dtype == BF16)
{
input_dtype = F16;
output_dtype = F16;
}
if (align_corners && out_width > 1)
{
scale_factor = ((float)(in_width - 1) * 1.0f) / (float)(out_width - 1);
}
else
{
scale_factor = ((float)in_width * 1.0f) / (float)out_width;
}
if (align_corners)
{
round_value = 0.5f;
}
else
{
round_value = 0.0f;
}
if (half_pixel_centers)
{
half_pixel_value = 0.5f;
}
else
{
half_pixel_value = 0.0f;
}
if (VSI_NN_KERNEL_QUANT_ASYMM == input_attr->quant )
{
input_scale = input_attr->asymm.scale;
inputZP = input_attr->asymm.zero_point;
}
else if (VSI_NN_KERNEL_QUANT_DFP == input_attr->quant)
{
srcFixPointPos = input_attr->dfp.fl;
if (srcFixPointPos >= 0)
{
input_scale = 1.0f / (float) ((int64_t)1 << srcFixPointPos);
}
else if (srcFixPointPos < 0)
{
input_scale = (float)((int64_t)1 << -srcFixPointPos);
}
inputZP = 0;
}
else
{
input_scale = 1.0f;
inputZP = 0;
}
if (VSI_NN_KERNEL_QUANT_ASYMM == output_attr->quant )
{
output_scale = 1.0f / output_attr->asymm.scale;
outputZP = output_attr->asymm.zero_point;
}
else if (VSI_NN_KERNEL_QUANT_DFP == output_attr->quant)
{
dstFixPointPos = output_attr->dfp.fl;
if (dstFixPointPos >= 0)
{
output_scale = (float) ((int64_t)1 << dstFixPointPos);
}
else if (dstFixPointPos < 0)
{
output_scale = 1.0f / (float) ((int64_t)1 << -dstFixPointPos);
}
outputZP = 0;
}
else
{
output_scale = 1.0;
outputZP = 0;
}
if (F16 == input_dtype && F16 == output_dtype)
{
gpu_dp_inst_t uniGetExtractData_2x8 = {{
0x00009999, // TCfg
0x00000000, // ASelt
0x06040200, 0x00000000, // ABin
0x0000aaaa, // BSelt
0x00000000, 0x00000000, // BBin
0x00000300, // AccumType, ConstantType, and PostShift
0x00100010, 0x00100010, 0x00100010, 0x00100010,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
if (scale_factor < 4.0f)
{
status = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
CHECK_STATUS_FAIL_GOTO(status, final );
}
gpu_param.global_scale[0] = 4;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
status = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
CHECK_STATUS_FAIL_GOTO(status, final );
}
else if ( input_dtype == output_dtype && (I8 == input_dtype || I16 == input_dtype))
{
gpu_dp_inst_t uniGetExtractData_2x8 = {{
0x00009999, // TCfg
0x00000000, // ASelt
0x06040200, 0x00000000, // ABin
0x0000aaaa, // BSelt
0x00000000, 0x00000000, // BBin
0x00000300, // AccumType, ConstantType, and PostShift
0x00080008, 0x00080008, 0x00080008, 0x00080008,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniConvertI8toI8_2x8 = {{
0x11111111, // TCfg
0x00000000, // ASelt
0x03020100, 0x07060504, // ABin
0x22222222, // BSelt
0x00000000, 0x00000000, // BBin
0x00000600, // AccumType, ConstantType, and PostShift
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
}, GPU_DP_TYPE_16};
if (I16 == input_dtype)
{
uniGetExtractData_2x8.data[8] = 0x00100010;
uniGetExtractData_2x8.data[9] = 0x00100010;
uniGetExtractData_2x8.data[10] = 0x00100010;
uniGetExtractData_2x8.data[11] = 0x00100010;
uniGetExtractData_2x8.data[12] = 0x00100010;
uniGetExtractData_2x8.data[13] = 0x00100010;
uniGetExtractData_2x8.data[14] = 0x00100010;
uniGetExtractData_2x8.data[15] = 0x00100010;
}
if (srcFixPointPos > dstFixPointPos)
{
int32_t postshift = vsi_nn_min(srcFixPointPos - dstFixPointPos, MAX_POST_SHIFT_BITS);
uniConvertI8toI8_2x8.data[7] |= (postshift & 0x1F);
}
else
{
uint32_t multiplier = vsi_nn_min((int64_t)1 << (dstFixPointPos - srcFixPointPos), MAX_MULTIPLIER_NUM);
uint32_t i = 0;
for (i = 0; i < 8; i++)
{
uniConvertI8toI8_2x8.data[i + 8] = multiplier;
}
}
if (scale_factor < 4.0f)
{
status = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
CHECK_STATUS_FAIL_GOTO(status, final );
}
gpu_param.global_scale[0] = 4;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
status = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertI8toI8_2x8", &uniConvertI8toI8_2x8);
CHECK_STATUS_FAIL_GOTO(status, final );
}
else if (U8 == input_dtype && U8 == output_dtype)
{
uint16_t M0 = 0;
int32_t postShift = 0;
uint32_t multAndoutZP[2] = {0};
gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
0xdddddddd, // TCfg
0x44444444, // ASelt
0x13121110, 0x17161514, // ABin
0x11111111, // BSelt
0x00000000, 0x00000000, // BBin
0x00002400, // AccumType, ConstantType, and PostShift
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_dp_inst_t uniGetExtractData_2x8 = {{
0x00009999, // TCfg
0x00000000, // ASelt
0x06040200, 0x00000000, // ABin
0x0000aaaa, // BSelt
0x00000000, 0x00000000, // BBin
0x00000300, // AccumType, ConstantType, and PostShift
0x00080008, 0x00080008, 0x00080008, 0x00080008,
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16};
gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);
multAndoutZP[0] = (uint32_t)(M0);
multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);
uniMultiplyAndPostShift_2x8.data[7] |= (postShift & 0x1F);
if (scale_factor < 4.0f)
{
status = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
CHECK_STATUS_FAIL_GOTO(status, final );
}
gpu_param.global_scale[0] = 4;
gpu_param.global_scale[1] = 1;
gpu_param.global_scale[2] = 1;
status = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
status |= vsi_nn_kernel_gpu_add_param( node, "multAndoutZP", multAndoutZP);
status |= vsi_nn_kernel_gpu_add_param( node, "uniMultiplyAndPostShift_2x8", &uniMultiplyAndPostShift_2x8);
CHECK_STATUS_FAIL_GOTO(status, final );
}
status = vsi_nn_kernel_gpu_add_param( node, "half_pixel_value", &half_pixel_value);
status |= vsi_nn_kernel_gpu_add_param( node, "round_value", &round_value);
CHECK_STATUS_FAIL_GOTO(status, final );
gpu_param.global_size[0] = gpu_align_p2((out_width + gpu_param.global_scale[0] - 1)\
/ gpu_param.global_scale[0], 4);
gpu_param.global_size[1] = (out_height + gpu_param.global_scale[1] - 1) / gpu_param.global_scale[1];
gpu_param.global_size[2] = depth;
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
#undef MAX_MULTIPLIER_NUM
#undef MAX_POST_SHIFT_BITS
final:
if (input_attr) vsi_nn_kernel_tensor_attr_release( &input_attr );
if (output_attr) vsi_nn_kernel_tensor_attr_release( &output_attr );
return status;
} /* _resize_nearest_initializer() */
/*
* Query kernel
*/
static vsi_status _query_kernel
(
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
int32_t align_corners
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_dtype_e in_dtype = F16;
vsi_nn_kernel_dtype_e out_dtype = F16;
const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
size_t kernel_map_size = _cnt_of_array( _resize_1d_nearest_kernel_map );
vx_param_description_t * param_def = _resize_1d_nearest_kernel_param_def;
size_t param_def_size = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
vx_kernel_initialize_f initializer = _resize_1d_nearest_initializer;
uint32_t key = 0;
uint32_t i = 0;
uint32_t inputWidth = inputs[0]->attr.size[0];
uint32_t outputWidth = outputs[0]->attr.size[0];
float scale_factor;
_internal_nearest_e resize_mode = LARGE;
if (align_corners && outputWidth > 1)
{
scale_factor = (vx_float32)(inputWidth - 1) / (vx_float32)(outputWidth - 1);
}
else
{
scale_factor = (vx_float32)inputWidth / (vx_float32)outputWidth;
}
if (scale_factor < 4.0f)
{
resize_mode = SMALL;
}
else
{
resize_mode = LARGE;
}
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
if (BF16 == in_dtype && BF16 == out_dtype)
{
in_dtype = F16;
out_dtype = F16;
}
key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype, resize_mode );
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
{
if( kernel_map[i].key == key )
{
break;
}
}
if ( i < (uint32_t)kernel_map_size )
{
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
kernel->info.parameters = param_def;
kernel->info.numParams = (uint32_t)param_def_size;
kernel->info.initialize = initializer;
// Register code source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
"vsi_nn_kernel_header",
kernel_map[i].source_name );
// Register binary source
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
kernel_map[i].source_name );
status = VSI_SUCCESS;
}
return status;
} /* _query_kernel() */
static vsi_nn_kernel_node_t _setup
(
vsi_nn_graph_t * graph,
vsi_nn_tensor_t ** inputs,
size_t input_num,
vsi_nn_tensor_t ** outputs,
size_t output_num,
const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel
)
{
vsi_status status = VSI_FAILURE;
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM] = {NULL};
vsi_nn_kernel_node_t node = NULL;
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
status = _query_kernel( kernel, inputs, outputs, align_corners );
if ( VSI_SUCCESS == status)
{
node = vsi_nn_kernel_create_node( graph, kernel );
if ( node )
{
/* Set inputs and outputs */
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
inputs, input_num, outputs, output_num );
node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
/* Pass parameters to node. */
status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
VSI_ASSERT( status == VSI_SUCCESS );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
}
}
return node;
} /* _setup() */
__END_DECLS
REGISTER_BACKEND_EVIS( resize_1d_nearest, _setup )

View File

@ -850,7 +850,7 @@ static vsi_status _query_kernel
} }
} }
if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size)) if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size) && is_same_type && is_evis2)
{ {
scale_flag = UP_OPT; scale_flag = UP_OPT;
key = RESIZE_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag ); key = RESIZE_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );

View File

@ -348,7 +348,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
else if (U8 == input_dtype && U8 == output_dtype) else if (U8 == input_dtype && U8 == output_dtype)
{ {
uint16_t M0 = 0; uint16_t M0 = 0;
vx_int8 postShift = 0; int32_t postShift = 0;
uint32_t multAndoutZP[2] = {0}; uint32_t multAndoutZP[2] = {0};
gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{ gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
0xdddddddd, // TCfg 0xdddddddd, // TCfg
@ -371,7 +371,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant 0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16}; }, GPU_DP_TYPE_16};
vsi_nn_GetFP32MultiAndPostShift(input_scale * output_scale, &M0, &postShift); gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);
multAndoutZP[0] = (uint32_t)(M0); multAndoutZP[0] = (uint32_t)(M0);
multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0); multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);

View File

@ -131,9 +131,9 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
float outputScale = 1.0f; float outputScale = 1.0f;
int32_t outputZP = 0; int32_t outputZP = 0;
uint16_t in0_M0 = 0; uint16_t in0_M0 = 0;
int8_t in0_postShift = 0; int32_t in0_postShift = 0;
uint16_t in1_M0 = 0; uint16_t in1_M0 = 0;
int8_t in1_postShift = 0; int32_t in1_postShift = 0;
uint32_t pack_key = 0; uint32_t pack_key = 0;
input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0); input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final ); CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
outputZP = output_attr->asymm.zero_point; outputZP = output_attr->asymm.zero_point;
} }
vsi_nn_GetFP32MultiAndPostShift(input0Scale / outputScale, &in0_M0, &in0_postShift); gpu_quantize_multiplier_16bit(input0Scale / outputScale, &in0_M0, &in0_postShift);
vsi_nn_GetFP32MultiAndPostShift(input1Scale / outputScale, &in1_M0, &in1_postShift); gpu_quantize_multiplier_16bit(input1Scale / outputScale, &in1_M0, &in1_postShift);
pack_key = _PACK_SELECT_KEY( input0_attr->dtype, input1_attr->dtype, output_attr->dtype ); pack_key = _PACK_SELECT_KEY( input0_attr->dtype, input1_attr->dtype, output_attr->dtype );

View File

@ -309,7 +309,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
{ {
float uint8Scale = scaleIn / scaleOut; float uint8Scale = scaleIn / scaleOut;
uint16_t M0 = 0; uint16_t M0 = 0;
int8_t postShift = 0; int32_t postShift = 0;
uint32_t multAndoutZP[2] = {0}; uint32_t multAndoutZP[2] = {0};
gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{ gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
@ -323,7 +323,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant 0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
}, GPU_DP_TYPE_16}; }, GPU_DP_TYPE_16};
vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift); gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
multAndoutZP[0] = (uint32_t)(M0); multAndoutZP[0] = (uint32_t)(M0);
multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0); multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);

View File

@ -154,7 +154,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
int32_t input_fl = 0; int32_t input_fl = 0;
int32_t output_fl = 0; int32_t output_fl = 0;
uint16_t M0 = 0; uint16_t M0 = 0;
int8_t postShift = 0; int32_t postShift = 0;
float inputScale = 1.0f; float inputScale = 1.0f;
int32_t input_ZP = 0; int32_t input_ZP = 0;
float outputScale = 1.0f; float outputScale = 1.0f;
@ -212,7 +212,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
factorOut = 1.0f / outputScale; factorOut = 1.0f / outputScale;
vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift); gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);
image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]); image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);

View File

@ -84,7 +84,7 @@ void gpu_quantize_multiplier_32bit
double q; double q;
int64_t q_fixed; int64_t q_fixed;
const int32_t bit = 32; const int32_t bit = 32;
if( vsi_abs(double_multiplier - 0.0) < 1e-5 ) if( vsi_abs(double_multiplier - 0.0) < 1e-8 )
{ {
*quantize_multiplier = 0; *quantize_multiplier = 0;
*shift = bit - 0; *shift = bit - 0;
@ -116,6 +116,25 @@ void gpu_quantize_multiplier_32bit
} }
} /* gpu_quantize_multiplier_32_bit() */ } /* gpu_quantize_multiplier_32_bit() */
void _modify_multiplier_postshift
(
uint16_t * quantize_multiplier,
int32_t * shift
)
{
uint16_t multiplier = *quantize_multiplier;
int32_t postshift = *shift;
while (postshift > GPU_MAX_POST_SHIFT_BITS)
{
multiplier = (multiplier + 1) >> 1;
postshift --;
}
*quantize_multiplier = multiplier;
*shift = postshift;
}
void gpu_quantize_multiplier_16bit void gpu_quantize_multiplier_16bit
( (
double double_multiplier, double double_multiplier,
@ -135,5 +154,8 @@ void gpu_quantize_multiplier_16bit
{ {
*shift -= bit; *shift -= bit;
} }
_modify_multiplier_postshift(quantize_multiplier, shift);
} /* gpu_quantize_multiplier_16bit() */ } /* gpu_quantize_multiplier_16bit() */

View File

@ -1145,10 +1145,10 @@ vsi_nn_kernel_tensor_attr_t * vsi_nn_kernel_tensor_attr_create
status = vxQueryTensor( (vx_tensor)tensor, VX_TENSOR_SCALE, status = vxQueryTensor( (vx_tensor)tensor, VX_TENSOR_SCALE,
&(attr->asymm.scale), sizeof(float)); &(attr->asymm.scale), sizeof(float));
CHECK_STATUS( status ); CHECK_STATUS( status );
// Reset scale to 1 // Reset scale to 1e-8
if( (attr->asymm.scale - 0.f) < 1e-5 ) if( (attr->asymm.scale - 0.f) < 1e-8 )
{ {
attr->asymm.scale = 1.0f; attr->asymm.scale = (float)1e-8;
attr->asymm.zero_point = 0; attr->asymm.zero_point = 0;
} }
} }
@ -1225,12 +1225,16 @@ vsi_status vsi_nn_kernel_pirority_set
static vsi_bool _check_shader_support(vsi_nn_graph_t* graph) static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
{ {
char *envctrl; char *envctrl;
int32_t enableShader = 1; static int32_t enableShader = -1;
envctrl = getenv("VIV_VX_ENABLE_SHADER"); if (enableShader == -1)
if (envctrl)
{ {
enableShader = atoi(envctrl); enableShader = 1;
envctrl = getenv("VIV_VX_ENABLE_SHADER");
if (envctrl)
{
enableShader = atoi(envctrl);
}
} }
#if VX_HARDWARE_CAPS_PARAMS_EXT_SUPPORT #if VX_HARDWARE_CAPS_PARAMS_EXT_SUPPORT
@ -1240,7 +1244,7 @@ static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
} }
#endif #endif
if(enableShader == 1) if (enableShader >= 1)
{ {
return TRUE; return TRUE;
} }

View File

@ -459,7 +459,7 @@ vsi_bool vsi_nn_kernel_optimize_broadcast_shape
k = 0; k = 0;
for (j = 0; j < (size_t)input_num; j++) for (j = 0; j < (size_t)input_num; j++)
{ {
if (size_in[k] > 1) if (size_in[j] > 1)
{ {
k = j; k = j;
break; break;

View File

@ -603,10 +603,14 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
vsi_nn_tensor_t * bias vsi_nn_tensor_t * bias
) )
{ {
vsi_nn_tensor_t * new_bias = NULL; vsi_nn_tensor_t * new_bias = NULL;
vsi_nn_tensor_attr_t attr; vsi_nn_tensor_attr_t attr;
int32_t *new_bias_data_ptr = NULL;
uint8_t *weight_data = NULL;
int32_t *bias_data = NULL;
uint32_t i, j;
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t)); memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
weight_data = vsi_nn_ConvertTensorToData(graph, weight);
if (bias == NULL) if (bias == NULL)
{ {
@ -620,26 +624,47 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
attr.dtype.zero_point = 0; attr.dtype.zero_point = 0;
attr.dtype.vx_type = VSI_NN_TYPE_INT32; attr.dtype.vx_type = VSI_NN_TYPE_INT32;
} }
else
{
VSILOGE("need to add ...");
}
} }
else else
{ {
memcpy(&attr, &bias->attr, sizeof(vsi_nn_tensor_attr_t)); memcpy(&attr, &bias->attr, sizeof(vsi_nn_tensor_attr_t));
if (attr.dim_num == 1)
{
attr.size[1] = 1;
attr.dim_num = 2;
}
bias_data = (int32_t *)vsi_nn_ConvertTensorToData(graph, bias);
} }
new_bias = vsi_nn_CreateTensorWithDefault(graph, &attr, 0.0); new_bias_data_ptr = (int32_t *)malloc(attr.size[0] * sizeof(int32_t));
memset((void *)new_bias_data_ptr, 0, sizeof(int32_t) * attr.size[0]);
if (input->attr.dtype.zero_point == 0) if (input->attr.dtype.zero_point != 0)
{ {
return new_bias; for (i = 0; i < weight->attr.size[1]; i++)
{
uint8_t *weight_ptr = weight_data + i * weight->attr.size[0];
for (j = 0; j < weight->attr.size[0]; j++)
{
new_bias_data_ptr[i] += -((int32_t)weight_ptr[j] - weight->attr.dtype.zero_point) \
* input->attr.dtype.zero_point;
}
}
} }
else
if (bias_data != NULL)
{ {
VSILOGE("need to process bias - (input_zp * (w - w_zp)) ..."); for (i = 0; i < weight->attr.size[1]; i++)
{
new_bias_data_ptr[i] += bias_data[i];
}
} }
new_bias = vsi_nn_CreateTensorFromData(graph, (uint8_t *)new_bias_data_ptr, &attr);
vsi_nn_safe_free( new_bias_data_ptr );
vsi_nn_safe_free( bias_data );
vsi_nn_safe_free( weight_data );
return new_bias; return new_bias;
} }

View File

@ -31,6 +31,8 @@
#include "kernel/vsi_nn_kernel.h" #include "kernel/vsi_nn_kernel.h"
#include "kernel/vsi_nn_kernel_node.h" #include "kernel/vsi_nn_kernel_node.h"
#include "vsi_nn_feature.h" #include "vsi_nn_feature.h"
#include "vsi_nn_tensor_util.h"
#include "vsi_nn_graph_optimization.h"
static vsi_bool _build_vx_conv2d_param static vsi_bool _build_vx_conv2d_param
( (
@ -173,6 +175,7 @@ static vx_tensor _expand_tensor_dim
vsi_nn_kernel_t * kernel \ vsi_nn_kernel_t * kernel \
) )
REGISTER_CONV_OPENVX_KERNEL( conv1d ) REGISTER_CONV_OPENVX_KERNEL( conv1d )
{ {
vx_node node = NULL; vx_node node = NULL;
@ -196,10 +199,34 @@ REGISTER_CONV_OPENVX_KERNEL( conv1d )
temp_tensors[0] = _expand_tensor_dim( inputs[0]->t, temp_tensors[0] = _expand_tensor_dim( inputs[0]->t,
(int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 ); (int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final ); CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );
if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
{
temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
(int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
}
else
{
uint8_t * data = NULL;
vsi_nn_tensor_attr_t attr;
uint32_t i;
temp_tensors[1] = _expand_tensor_dim( inputs[1]->t, data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
(int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 ); CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
attr.size[0] = 1;
for (i = 1; i <= inputs[1]->attr.dim_num; i++)
{
attr.size[i] = inputs[1]->attr.size[i - 1];
}
attr.dim_num = inputs[1]->attr.dim_num + 1;
attr.dtype.channel_dim = inputs[1]->attr.dtype.channel_dim + 1;
temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
vsi_nn_safe_free( data );
}
temp_tensors[2] = _expand_tensor_dim( outputs[0]->t, temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
(int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 ); (int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );
@ -248,9 +275,38 @@ REGISTER_CONV_OPENVX_KERNEL( depthwise_conv1d )
(int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 ); (int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final ); CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );
temp_tensors[1] = _expand_tensor_dim( inputs[1]->t, if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
(int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 ); {
CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final ); temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
(int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
}
else
{
uint8_t * data = NULL;
vsi_nn_tensor_attr_t attr;
uint32_t i;
data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
attr.size[0] = 1;
attr.size[1] = inputs[1]->attr.size[0];
attr.size[2] = 1;
for (i = 1; i < inputs[1]->attr.dim_num; i++)
{
attr.size[2] *= inputs[1]->attr.size[i];
}
attr.size[3] = 1;
attr.dim_num = 4;
attr.dtype.channel_dim = 2;
temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
vsi_nn_safe_free( data );
}
temp_tensors[2] = _expand_tensor_dim( outputs[0]->t, temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
(int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 ); (int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );

View File

@ -38,27 +38,27 @@ typedef struct _sort_lut_s
float val; float val;
} sort_lut; } sort_lut;
static float exp_eval(float val) static float exp_eval(float val, float alpha)
{ {
return expf(val); return expf(val);
} }
static float log_eval(float data) static float log_eval(float data, float alpha)
{ {
return logf(data); return logf(data);
} }
static float elu_eval(float data) static float elu_eval(float data, float alpha)
{ {
return data >=0 ? data : expf(data) - 1; return data >=0 ? data : expf(data) * alpha - alpha;
} }
static float neg_eval(float data) static float neg_eval(float data, float alpha)
{ {
return data * -1.0f; return data * -1.0f;
} }
static float hsigmoid_eval(float data) static float hsigmoid_eval(float data, float alpha)
{ {
data = (float)(0.2 * data + 0.5); data = (float)(0.2 * data + 0.5);
data = vsi_nn_clamp(data, 0, 1); data = vsi_nn_clamp(data, 0, 1);
@ -66,14 +66,14 @@ static float hsigmoid_eval(float data)
return data; return data;
} }
static float soft_plus_eval(float data) static float soft_plus_eval(float data, float alpha)
{ {
return log_eval(exp_eval(data) + 1); return log_eval(exp_eval(data, alpha) + 1, alpha);
} }
static float mish_eval(float data) static float mish_eval(float data, float alpha)
{ {
data = (float)(data * tanh(soft_plus_eval(data))); data = (float)(data * tanh(soft_plus_eval(data, alpha)));
return data; return data;
} }
@ -96,7 +96,7 @@ static int32_t _lut_comparator(const void *pa, const void *pb)
return 0; return 0;
} }
static void _set_unary_table_lookup(float func(float), float *index, float *value) static void _set_unary_table_lookup(float func(float, float), float *index, float *value, float alpha)
{ {
#define VSI_NN_MAX_LUT_SIZE (1024) #define VSI_NN_MAX_LUT_SIZE (1024)
#define FLT16_MAX (57344) #define FLT16_MAX (57344)
@ -108,25 +108,25 @@ static void _set_unary_table_lookup(float func(float), float *index, float *valu
{ {
int16_t val = (int16_t)(i << 6); int16_t val = (int16_t)(i << 6);
lut[i].index = fp16_to_fp32(val); lut[i].index = fp16_to_fp32(val);
lut[i].val = func(lut[i].index); lut[i].val = func(lut[i].index, alpha);
} }
for (i = 0x0; i < 0x10; i++) for (i = 0x0; i < 0x10; i++)
{ {
lut[i].index = 0; lut[i].index = 0;
lut[i].val = func(lut[i].index); lut[i].val = func(lut[i].index, alpha);
} }
for (i = 0x1F0; i < 0x200; i++) for (i = 0x1F0; i < 0x200; i++)
{ {
lut[i].index = FLT16_MAX; lut[i].index = FLT16_MAX;
lut[i].val = func(lut[i].index); lut[i].val = func(lut[i].index, alpha);
} }
for (i = 0x3F0; i < 0x400; i++) for (i = 0x3F0; i < 0x400; i++)
{ {
lut[i].index = FLT16_MIN; lut[i].index = FLT16_MIN;
lut[i].val = func(lut[i].index); lut[i].val = func(lut[i].index, alpha);
} }
qsort(lut, VSI_NN_MAX_LUT_SIZE, sizeof(sort_lut), _lut_comparator); qsort(lut, VSI_NN_MAX_LUT_SIZE, sizeof(sort_lut), _lut_comparator);
@ -154,13 +154,14 @@ static vsi_nn_kernel_node_t _setup
size_t output_num, size_t output_num,
const vsi_nn_kernel_param_t * params, const vsi_nn_kernel_param_t * params,
vsi_nn_kernel_t * kernel, vsi_nn_kernel_t * kernel,
float func(float) float func(float, float)
) )
{ {
#ifdef VX_USER_LOOKUP_TABLE_SUPPORT #ifdef VX_USER_LOOKUP_TABLE_SUPPORT
vx_lut lut1 = NULL; vx_lut lut1 = NULL;
vx_lut lut2 = NULL; vx_lut lut2 = NULL;
vx_node node = NULL; vx_node node = NULL;
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
float index[1024] = {0}; float index[1024] = {0};
float value[1024] = {0}; float value[1024] = {0};
@ -172,7 +173,7 @@ static vsi_nn_kernel_node_t _setup
return NULL; return NULL;
} }
_set_unary_table_lookup(func, index, value); _set_unary_table_lookup(func, index, value, alpha);
lut1 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024); lut1 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);
lut2 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024); lut2 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);

View File

@ -1,12 +1,12 @@
float4 eltwise_unary_sin(float4 x) float4 eltwise_unary_sin(float4 x, float alpha)
{ {
return native_sin(x); return native_sin(x);
} }
#define logE (1.44269502f) #define logE (1.44269502f)
#define twoLogE (logE * 2.0f) #define twoLogE (logE * 2.0f)
float4 eltwise_unary_exp(float4 x) float4 eltwise_unary_exp(float4 x, float alpha)
{ {
x *= logE; x *= logE;
x = exp2(x); x = exp2(x);
@ -14,33 +14,33 @@ float4 eltwise_unary_exp(float4 x)
} }
#define rlogE (0.693147182f) #define rlogE (0.693147182f)
float4 eltwise_unary_log(float4 x) float4 eltwise_unary_log(float4 x, float alpha)
{ {
x = log2(x); x = log2(x);
return x * rlogE; return x * rlogE;
} }
float4 eltwise_unary_elu(float4 val) float4 eltwise_unary_elu(float4 val, float alpha)
{ {
float4 x = val * logE; float4 x = val * logE;
x = exp2(x) - 1; x = exp2(x) * alpha - alpha;
return val < 0 ? x : val; return val < 0 ? x : val;
} }
float4 eltwise_unary_neg(float4 x) float4 eltwise_unary_neg(float4 x, float alpha)
{ {
return x * -1; return x * -1;
} }
float4 eltwise_unary_hard_sigmoid(float4 x) float4 eltwise_unary_hard_sigmoid(float4 x, float alpha)
{ {
x = 0.2 * x + 0.5; x = 0.2 * x + 0.5;
x = clamp(x, 0, 1); x = clamp(x, 0, 1);
return x; return x;
} }
float4 _softrelu(float4 x) float4 _softrelu(float4 x, float alpha)
{ {
x *= logE; x *= logE;
x = exp2(x); x = exp2(x);
@ -49,7 +49,7 @@ float4 _softrelu(float4 x)
return x * rlogE; return x * rlogE;
} }
float4 _tanh(float4 x) float4 _tanh(float4 x, float alpha)
{ {
x *= -twoLogE; x *= -twoLogE;
x = 1 + exp2(x); x = 1 + exp2(x);
@ -57,10 +57,10 @@ float4 _tanh(float4 x)
return (2 * x - 1); return (2 * x - 1);
} }
float4 eltwise_unary_mish(float4 x) float4 eltwise_unary_mish(float4 x, float alpha)
{ {
float4 y = _softrelu(x); float4 y = _softrelu(x, alpha);
x = x * _tanh(y); x = x * _tanh(y, alpha);
return x; return x;
} }
@ -72,14 +72,15 @@ __kernel void func_name##_F32toF32 \
float inputScale, \ float inputScale, \
float inputTail, \ float inputTail, \
float outputScale, \ float outputScale, \
float outputZP \ float outputZP, \
float alpha \
) \ ) \
{ \ { \
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \ int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
\ \
float4 src = read_imagef(input, coord); \ float4 src = read_imagef(input, coord); \
\ \
float4 dst = eltwise_unary_##func_name(src); \ float4 dst = eltwise_unary_##func_name(src, alpha); \
\ \
write_imagef(output, coord, dst); \ write_imagef(output, coord, dst); \
} }
@ -99,14 +100,15 @@ __kernel void func_name##_F32toF32_2D \
float inputScale, \ float inputScale, \
float inputTail, \ float inputTail, \
float outputScale, \ float outputScale, \
float outputZP \ float outputZP, \
float alpha \
) \ ) \
{ \ { \
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \ int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
\ \
float4 src = read_imagef(input, coord); \ float4 src = read_imagef(input, coord); \
\ \
float4 dst = eltwise_unary_##func_name(src); \ float4 dst = eltwise_unary_##func_name(src, alpha); \
\ \
write_imagef(output, coord, dst); \ write_imagef(output, coord, dst); \
} }
@ -126,7 +128,8 @@ __kernel void func_name##_U8toU8 \
float inputScale, \ float inputScale, \
float inputTail, \ float inputTail, \
float outputScale, \ float outputScale, \
float outputZP \ float outputZP, \
float alpha \
) \ ) \
{ \ { \
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \ int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
@ -134,7 +137,7 @@ __kernel void func_name##_U8toU8 \
uint4 src = read_imageui(input, coord); \ uint4 src = read_imageui(input, coord); \
float4 data = convert_float4(src) * inputScale - inputTail; \ float4 data = convert_float4(src) * inputScale - inputTail; \
\ \
data = eltwise_unary_##func_name(data); \ data = eltwise_unary_##func_name(data, alpha); \
uint4 dst = convert_uint4(data * outputScale + outputZP); \ uint4 dst = convert_uint4(data * outputScale + outputZP); \
\ \
write_imageui(output, coord, dst); \ write_imageui(output, coord, dst); \
@ -155,7 +158,8 @@ __kernel void func_name##_U8toU8_2D \
float inputScale, \ float inputScale, \
float inputTail, \ float inputTail, \
float outputScale, \ float outputScale, \
float outputZP \ float outputZP, \
float alpha \
) \ ) \
{ \ { \
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \ int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
@ -163,7 +167,7 @@ __kernel void func_name##_U8toU8_2D \
uint4 src = read_imageui(input, coord); \ uint4 src = read_imageui(input, coord); \
float4 data = convert_float4(src) * inputScale - inputTail; \ float4 data = convert_float4(src) * inputScale - inputTail; \
\ \
data = eltwise_unary_##func_name(data); \ data = eltwise_unary_##func_name(data, alpha); \
uint4 dst = convert_uint4(data * outputScale + outputZP); \ uint4 dst = convert_uint4(data * outputScale + outputZP); \
\ \
write_imageui(output, coord, dst); \ write_imageui(output, coord, dst); \
@ -184,7 +188,8 @@ __kernel void neg_I32toI32
float inputScale, float inputScale,
float inputTail, float inputTail,
float outputScale, float outputScale,
float outputZP float outputZP,
float alpha
) )
{ {
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
@ -202,7 +207,8 @@ __kernel void neg_I32toI32_2D
float inputScale, float inputScale,
float inputTail, float inputTail,
float outputScale, float outputScale,
float outputZP float outputZP,
float alpha
) )
{ {
int2 coord = (int2)(get_global_id(0), get_global_id(1)); int2 coord = (int2)(get_global_id(0), get_global_id(1));

View File

@ -0,0 +1,57 @@
__kernel void resize_1d_bilinear_F32toF32(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
float scale_x,
float half_pixel_value
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
float in_x = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
float left_x_f = floor(in_x);
float x_lerp = in_x - left_x_f;
int left_x_idx = convert_int(left_x_f);
int4 coord_in = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
float4 top_l, top_r, top, bottom, dst;
top_l = read_imagef(input, coord_in);
coord_in.x++;
top_r = read_imagef(input, coord_in);
top_r = top_r - top_l;
dst = top_l + x_lerp * top_r;
write_imagef(output, coord_out, dst);
}
__kernel void resize_1d_bilinear_U8toU8(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
float scale_x,
float half_pixel_value,
float in_scale,
float in_tail,
float out_scale,
float out_tail
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
float in_x = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
float left_x_f = floor(in_x);
float x_lerp = in_x - left_x_f;
int left_x_idx = convert_int(left_x_f);
int4 coord_in = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
float4 top_l, top_r, top;
uint4 dst;
top_l = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
coord_in.x++;
top_r = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
top_r = top_r - top_l;
top = top_l + x_lerp * top_r;
dst = convert_uint4(top * out_scale + out_tail);
write_imageui(output, coord_out, dst);
}

View File

@ -0,0 +1,36 @@
#define NEAREST_INDEX_PROCESS() \
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
float in_x = (convert_float(coord_out.x) + half_pixel_value) * scale_x + round_value; \
int in_x_idx = convert_int(in_x); \
__kernel void resize_1d_nearest_F32toF32(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
float scale_x,
float half_pixel_value,
float round_value)
{
NEAREST_INDEX_PROCESS()
int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
float4 dst;
dst = read_imagef(input, coord_in);
write_imagef(output, coord_out, dst);
}
__kernel void resize_1d_nearest_U8toU8(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
float scale_x,
float half_pixel_value,
float round_value,
float output_scale,
float output_tail)
{
NEAREST_INDEX_PROCESS()
int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
uint4 dst;
dst = convert_uint4(convert_float4(read_imageui(input, coord_in)) * output_scale + output_tail);
write_imageui(output, coord_out, dst);
}

View File

@ -1,5 +1,7 @@
#include "cl_viv_vx_ext.h" #include "cl_viv_vx_ext.h"
_viv_uniform float alpha;
float4 eltwise_unary_sin(float4 x) float4 eltwise_unary_sin(float4 x)
{ {
return native_sin(x); return native_sin(x);
@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
float4 eltwise_unary_elu(float4 val) float4 eltwise_unary_elu(float4 val)
{ {
float4 x = val * logE; float4 x = val * logE;
x = exp2(x) - 1; x = exp2(x) * alpha - alpha;
return val < 0 ? x : val; return val < 0 ? x : val;
} }
@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
__kernel void func_name##_##src_type_name##to##dst_type_name##_2D( \ __kernel void func_name##_##src_type_name##to##dst_type_name##_2D( \
__read_only image2d_array_t input, \ __read_only image2d_array_t input, \
__write_only image2d_array_t output, \ __write_only image2d_array_t output, \
int type \ int type, \
float _alpha \
) \ ) \
{ \ { \
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \ int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
@ -194,7 +197,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
__kernel void func_name##_BF16toBF16_2D( \ __kernel void func_name##_BF16toBF16_2D( \
__read_only image2d_array_t input, \ __read_only image2d_array_t input, \
__write_only image2d_array_t output, \ __write_only image2d_array_t output, \
int type \ int type, \
float _alpha \
) \ ) \
{ \ { \
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \ int2 coord = (int2)(get_global_id(0), get_global_id(1)); \

View File

@ -1,5 +1,7 @@
#include "cl_viv_vx_ext.h" #include "cl_viv_vx_ext.h"
_viv_uniform float alpha;
float4 eltwise_unary_sin(float4 x) float4 eltwise_unary_sin(float4 x)
{ {
return native_sin(x); return native_sin(x);
@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
float4 eltwise_unary_elu(float4 val) float4 eltwise_unary_elu(float4 val)
{ {
float4 x = val * logE; float4 x = val * logE;
x = exp2(x) - 1; x = exp2(x) * alpha - alpha;
return val < 0 ? x : val; return val < 0 ? x : val;
} }
@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
__kernel void func_name##_##src_type_name##to##dst_type_name( \ __kernel void func_name##_##src_type_name##to##dst_type_name( \
__read_only image2d_array_t input, \ __read_only image2d_array_t input, \
__write_only image2d_array_t output, \ __write_only image2d_array_t output, \
int type \ int type, \
float _alpha \
) \ ) \
{ \ { \
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \ int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
@ -192,7 +195,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
__kernel void func_name##_BF16toBF16( \ __kernel void func_name##_BF16toBF16( \
__read_only image2d_array_t input, \ __read_only image2d_array_t input, \
__write_only image2d_array_t output, \ __write_only image2d_array_t output, \
int type \ int type, \
float _alpha \
) \ ) \
{ \ { \
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \ int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \

View File

@ -0,0 +1,148 @@
#include "cl_viv_vx_ext.h"
_viv_uniform float scale_x;
_viv_uniform int out_height;
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
_viv_uniform VXC_512Bits uniConvBF16toF32_odd_2x8;
_viv_uniform VXC_512Bits uniConvBF16toF32_even_2x8;
_viv_uniform VXC_512Bits uniConvBF16toF32_Part0_2x8;
_viv_uniform VXC_512Bits uniConvBF16toF32_Part1_2x8;
_viv_uniform float half_pixel_value;
__kernel void resize_1d_bilinear_BF16toBF16_DOWN
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
vxc_short8 top;
vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.y;
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.z;
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.w;
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 src;
float4 left4;
float4 right4;
float4 dst4;
VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_even_2x8);
_viv_asm(COPY, right4, src, 16);
VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_odd_2x8);
_viv_asm(COPY, left4, src, 16);
right4 -= left4;
dst4 = right4 * x_lerp + left4;
vxc_ushort8 tmp, dst;
_viv_asm(COPY, tmp, dst4, 16);
dst.s0123 = tmp.s1357;
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_in.y++;
coord_out.y ++;
} while (coord_out.y < out_height);
}
__kernel void resize_1d_bilinear_BF16toBF16_UP
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
float4 right_x_f = ceil(in_x);
int4 right_x_idx = convert_int4(right_x_f);
vxc_ushort8 src0, src1, dst0;
vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 bitextract_p0;
vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
vxc_ushort8 constData = 16;
VXC_DP2x8(maskShift, bitextract_p0, constData, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
coord_in.y ++;
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 dst_tmp;
float4 left4;
float4 right4;
VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part0_2x8);
_viv_asm(COPY, left4, dst_tmp, 16);
VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part1_2x8);
_viv_asm(COPY, right4, dst_tmp, 16);
right4 -= left4;
float4 dst4 = right4 * x_lerp + left4;
vxc_ushort8 tmp, dst;
_viv_asm(COPY, tmp, dst4, 16);
dst.s0123 = tmp.s1357;
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_out.y++;
} while (coord_out.y < out_height);
}

View File

@ -0,0 +1,136 @@
#include "cl_viv_vx_ext.h"
_viv_uniform VXC_512Bits uniResizeNxDown_2x8;
_viv_uniform int out_height;
#define RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
read_type read_data, save_data; \
data_type in0, result; \
\
int8 input_desc; \
_viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
_viv_asm(MOV, coord_in.w, baseAddr); \
\
int8 output_desc; \
_viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
_viv_asm(MOV, coord_out.w, baseAddr); \
\
while (coord_out.y < out_height) \
{ \
VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0)); \
_viv_asm(COPY, in0, read_data, 16); \
VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
_viv_asm(COPY, save_data, result, 16); \
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
coord_in.y++; \
coord_out.y++; \
} \
#define RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = coord_out.x << 1; \
RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
#define RESIZE_1D_2X_DOWN_8BIT_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = coord_out.x << 1; \
RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_2X_DOWN_8BIT_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_2X_DOWN_8BIT_SAME(I8, I8, vxc_char16, vxc_char16)
#define RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
read_type read_data, read_data1, save_data; \
data_type in0, in1, result; \
\
int8 input_desc; \
_viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
_viv_asm(MOV, coord_in.w, baseAddr); \
\
int8 output_desc; \
_viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
_viv_asm(MOV, coord_out.w, baseAddr); \
\
while (coord_out.y < out_height) \
{ \
VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
_viv_asm(COPY, in0, read_data, 16); \
VXC_OP4(img_load_3d, read_data1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0), \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
_viv_asm(COPY, in1, read_data1, 16); \
VXC_DP2x8(result, in0, in1, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
_viv_asm(COPY, save_data, result, 16); \
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
coord_in.y++; \
coord_out.y++; \
} \
#define RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = coord_out.x << 1; \
RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
#define RESIZE_1D_2X_DOWN_16BIT_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = coord_out.x << 1; \
RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_2X_DOWN_16BIT_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_2X_DOWN_16BIT_SAME(F16, F16, vxc_short8, vxc_half8)

View File

@ -0,0 +1,216 @@
#include "cl_viv_vx_ext.h"
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
_viv_uniform VXC_512Bits uniFp16toFp32_4x4;
_viv_uniform VXC_512Bits uniRightSubLeft_4x4;
_viv_uniform VXC_512Bits uniExtactHalf8_2x8;
_viv_uniform float scale_x;
_viv_uniform int out_height;
_viv_uniform float uint8Scale;
_viv_uniform float output_ZP;
_viv_uniform VXC_512Bits uniFp16toFp32_part1_4x4;
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
_viv_uniform float half_pixel_value;
_viv_uniform VXC_512Bits uniConvertFp2FP32_left_4x4;
_viv_uniform VXC_512Bits uniConvertFp2FP32_right_4x4;
__kernel void resize_1d_bilinear_F16toF16_DOWN
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
float4 left4;
float4 right4;
vxc_ushort8 src, result;
vxc_half8 src_half, dst;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, src_half, src, 16);
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
right4 -= left4;
float4 dst4 = right4 * x_lerp + left4;
half4 tmp;
_viv_asm(CONV, tmp, dst4);
VXC_DP2x8(dst, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
_viv_asm(COPY, result, dst, 16);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_in.y++;
coord_out.y ++;
} while (coord_out.y < out_height);
}
__kernel void resize_1d_bilinear_F16toU8_DOWN
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
float4 left4;
float4 right4;
vxc_ushort8 src;
vxc_uchar8 result;
vxc_half8 src_half, dst;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, src_half, src, 16);
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
right4 -= left4;
float4 dst4 = right4 * x_lerp + left4;
dst4 = dst4 * uint8Scale + output_ZP;
int4 dst = convert_int4_rte(dst4);
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_in.y++;
coord_out.y ++;
} while (coord_out.y < out_height);
}
__kernel void resize_1d_bilinear_F16toF16_UP
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
float4 right_x_f = ceil(in_x);
int4 right_x_idx = convert_int4(right_x_f);
vxc_ushort8 src0, src1, dst0;
vxc_half8 top;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 bitextract_p0;
vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
vxc_ushort8 constData = 16;
VXC_DP2x8(maskShift, bitextract_p0, constData, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, top, dst0, 16);
coord_in.y ++;
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
float4 left4;
float4 right4;
VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_4x4);
VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_part1_4x4);
float4 dst4 = right4 * x_lerp + left4;
half4 tmp;
_viv_asm(CONV, tmp, dst4);
VXC_DP2x8(top, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
_viv_asm(COPY, dst0, top, 16);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst0,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_out.y++;
} while (coord_out.y < out_height);
}

View File

@ -0,0 +1,147 @@
#include "cl_viv_vx_ext.h"
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
_viv_uniform float scale_x;
_viv_uniform int out_height;
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
_viv_uniform float dfpScale;
_viv_uniform float half_pixel_value;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
__kernel void resize_1d_bilinear_I16toI16_UP
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
float4 right_x_f = ceil(in_x);
int4 right_x_idx = convert_int4(right_x_f);
vxc_ushort8 src0, src1, dst0;
vxc_short8 top;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 bitextract_p0;
vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
vxc_ushort8 constData = 16;
VXC_DP2x8(maskShift, bitextract_p0, constData, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, top, dst0, 16);
float4 left4;
float4 right4;
coord_in.y ++;
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
float4 dst4 = right4 * x_lerp + left4;
dst4 = dst4 * dfpScale;
int4 dst = convert_int4_rte(dst4);
VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_out.y ++;
} while (coord_out.y < out_height);
}
__kernel void resize_1d_bilinear_I16toI16_DOWN
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
vxc_short8 src;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
float4 left4;
float4 right4;
vxc_short8 result;
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
right4 -= left4;
float4 dst4 = right4 * x_lerp + left4;
dst4 = dst4 * dfpScale;
int4 dst = convert_int4_rte(dst4);
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_in.y++;
coord_out.y ++;
} while (coord_out.y < out_height);
}

View File

@ -0,0 +1,148 @@
#include "cl_viv_vx_ext.h"
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
_viv_uniform float scale_x;
_viv_uniform int out_height;
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
_viv_uniform float dfpScale;
_viv_uniform float half_pixel_value;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
__kernel void resize_1d_bilinear_I8toI8_UP
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
float4 right_x_f = ceil(in_x);
int4 right_x_idx = convert_int4(right_x_f);
vxc_uchar16 src0, dst0;
vxc_char16 top;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 bitextract_p0;
vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
vxc_ushort8 constData = 8;
VXC_DP2x8(maskShift, bitextract_p0, constData, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_BitExtract(dst0, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, top, dst0, 16);
coord_in.y++;
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
float4 left4;
float4 right4;
VXC_DP4x4(left4, top, top, \
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
VXC_DP4x4(right4, top, top, \
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
float4 dst4 = right4 * x_lerp + left4;
dst4 = dst4 * dfpScale;
int4 dst = convert_int4_rte(dst4);
VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_out.y ++;
} while (coord_out.y < out_height);
}
__kernel void resize_1d_bilinear_I8toI8_DOWN
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
vxc_char16 src;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
float4 left4;
float4 right4;
vxc_char16 result;
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
right4 -= left4;
float4 dst4 = right4 * x_lerp + left4;
dst4 = dst4 * dfpScale;
int4 dst = convert_int4_rte(dst4);
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_in.y++;
coord_out.y ++;
} while (coord_out.y < out_height);
}

View File

@ -0,0 +1,212 @@
#include "cl_viv_vx_ext.h"
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_4x4;
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_left_4x4;
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_right_4x4;
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
_viv_uniform float scale_x;
_viv_uniform int out_height;
_viv_uniform int input_ZP;
_viv_uniform float uint8Scale;
_viv_uniform float output_ZP;
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_part1_4x4;
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
_viv_uniform float half_pixel_value;
__kernel void resize_1d_bilinear_U8toF16_DOWN
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
vxc_uchar16 src;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
float4 left4;
float4 right4;
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
unsigned char inputZP;
_viv_asm(COPY, inputZP, input_ZP, 4);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
right4 -= left4;
float4 dst4 = right4 * x_lerp + left4;
dst4 *= uint8Scale;
half4 dst;
_viv_asm(CONV, dst, dst4);
vxc_short8 dst_short;
_viv_asm(COPY, dst_short, dst, 16);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst_short,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_in.y++;
coord_out.y ++;
} while (coord_out.y < out_height);
}
__kernel void resize_1d_bilinear_U8toU8_UP
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
float4 right_x_f = ceil(in_x);
int4 right_x_idx = convert_int4(right_x_f);
vxc_uchar16 src0, src1;
vxc_uchar16 top;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 bitextract_p0;
vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
vxc_ushort8 constData = 8;
VXC_DP2x8(maskShift, bitextract_p0, constData, \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_BitExtract(top, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
coord_in.y++;
VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
float4 left4;
float4 right4;
unsigned char inputZP;
_viv_asm(COPY, inputZP, input_ZP, 4);
VXC_DP4x4(left4, top, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_4x4);
VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_part1_4x4);
float4 dst4 = right4 * x_lerp + left4;
dst4 = dst4 * uint8Scale + output_ZP;
int4 dst = convert_int4_rte(dst4);
VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_out.y ++;
} while (coord_out.y < out_height);
}
__kernel void resize_1d_bilinear_U8toU8_DOWN
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
float4 x_lerp = in_x - left_x_f;
int4 left_x_idx = convert_int4(left_x_f);
vxc_uchar16 src;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
float4 left4;
float4 right4;
vxc_uchar16 result;
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
unsigned char inputZP;
_viv_asm(COPY, inputZP, input_ZP, 4);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
coord_in.x = left_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
right4 -= left4;
float4 dst4 = right4 * x_lerp + left4;
dst4 = dst4 * uint8Scale + output_ZP;
int4 dst = convert_int4_rte(dst4);
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_in.y++;
coord_out.y ++;
} while (coord_out.y < out_height);
}

View File

@ -0,0 +1,78 @@
#include "cl_viv_vx_ext.h"
_viv_uniform float scale_x;
_viv_uniform int out_height;
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
_viv_uniform VXC_512Bits uniBilinear_4x4;
_viv_uniform float half_pixel_value;
__kernel void resize_1d_bilinear_U8toU8_UP_opt
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers,
__read_only image2d_array_t scale
)
{
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(2), 0);
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
float4 left_x_f = floor(in_x);
int4 left_x_idx = convert_int4(left_x_f);
int4 right_x_idx = left_x_idx + 1;
vxc_uchar16 src0;
vxc_uchar16 src_mask;
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 bitextract_p0;
vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx,
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
vxc_ushort8 constData = 8;
VXC_DP2x8(maskShift, bitextract_p0, constData,
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
vxc_ushort8 lerp_0;
vxc_half8 lerp;
int2 coord = (int2)(coord_out.x * 2, 0);
VXC_ReadImage(lerp_0, scale, coord, VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, lerp, lerp_0, 16);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
do
{
VXC_BitExtract(src_mask, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
coord_in.y++;
VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
vxc_uchar16 dst;
VXC_DP4x4(dst, src_mask, lerp,
VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniBilinear_4x4);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
coord_out.y ++;
} while (coord_out.y < out_height);
}

View File

@ -0,0 +1,155 @@
#include "cl_viv_vx_ext.h"
_viv_uniform VXC_512Bits uniResizeNxUp_2x8;
_viv_uniform int out_height;
#define RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
read_type read_data, save_data; \
data_type in0, result; \
\
int8 input_desc; \
_viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
_viv_asm(MOV, coord_in.w, baseAddr); \
\
int8 output_desc; \
_viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
_viv_asm(MOV, coord_out.w, baseAddr); \
\
while (coord_out.y < out_height) \
{ \
VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
_viv_asm(COPY, in0, read_data, 16); \
VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxUp_2x8); \
_viv_asm(COPY, save_data, result, 16); \
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
coord_in.y++; \
coord_out.y++; \
} \
#define RESIZE_1D_2X_HALF_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_HALF_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = (coord_out.x * 2 - 1) >> 2; \
coord_in.x = coord_out.x == 0 ? -1 : coord_in.x; \
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_2X_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_2X_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
RESIZE_1D_2X_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_2X_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
#define RESIZE_1D_2X_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = coord_out.x >> 1; \
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_2X_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_2X_SAME(I8, I8, vxc_char16, vxc_char16)
RESIZE_1D_2X_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_2X_SAME(F16, F16, vxc_short8, vxc_half8)
#define RESIZE_1D_4X_HALF_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_HALF_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = (coord_out.x * 2 - 3) >> 3; \
coord_in.x = coord_out.x == 0 ? -1 : coord_in.x; \
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_4X_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_4X_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
RESIZE_1D_4X_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_4X_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
#define RESIZE_1D_4X_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = coord_out.x >> 2; \
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_4X_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_4X_SAME(I8, I8, vxc_char16, vxc_char16)
RESIZE_1D_4X_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_4X_SAME(F16, F16, vxc_short8, vxc_half8)
#define RESIZE_1D_8X_HALF_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_HALF_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = (coord_out.x * 2 - 7) >> 4; \
coord_in.x = coord_out.x == 0 ? -1 : coord_in.x; \
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_8X_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_8X_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
RESIZE_1D_8X_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_8X_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
#define RESIZE_1D_8X_SAME(name0, name1, read_type, data_type) \
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_SAME \
( \
__read_only image2d_array_t input, \
__write_only image2d_array_t output, \
int scale_type \
) \
{ \
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
coord_in.x = coord_out.x >> 3; \
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
}
RESIZE_1D_8X_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
RESIZE_1D_8X_SAME(I8, I8, vxc_char16, vxc_char16)
RESIZE_1D_8X_SAME(I16, I16, vxc_short8, vxc_short8)
RESIZE_1D_8X_SAME(F16, F16, vxc_short8, vxc_half8)

View File

@ -0,0 +1,337 @@
#include "cl_viv_vx_ext.h"
_viv_uniform VXC_512Bits uniMultiplyAndPostShift_2x8;
_viv_uniform float scale_x;
_viv_uniform float half_pixel_value;
_viv_uniform float round_value;
_viv_uniform int2 multAndoutZP;//[0:15] multiplier, [31:63] output zp
#define NEAREST_INDEX_PROCESS() \
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3); \
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x + round_value; \
int4 in_x_idx = convert_int4(in_x); \
__kernel void resize_1d_nearest_F16toF16
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_short8 src;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}
_viv_uniform VXC_512Bits uniGetExtractData_2x8;
__kernel void resize_1d_nearest_F16toF16_op
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_ushort8 src0, src1, dst;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
vxc_ushort8 input_idx;
_viv_asm(COPY, input_idx, in_x_idx, 16);
VXC_DP2x8(mask, input_idx, input_idx, \
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
VXC_BitExtract(dst, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}
_viv_uniform VXC_512Bits uniConvertI8toI8_2x8;
__kernel void resize_1d_nearest_I8toI8
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_char16 src;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}
__kernel void resize_1d_nearest_I8toI8_op
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_uchar16 src0, dst0;
vxc_char16 dst;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
vxc_ushort8 input_idx;
_viv_asm(COPY, input_idx, in_x_idx, 16);
VXC_DP2x8(mask, input_idx, input_idx, \
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
VXC_BitExtract(dst0, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, dst, dst0, 8);
VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}
__kernel void resize_1d_nearest_U8toU8
(
image2d_array_t input,
image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_uchar16 src;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 multiplier;
_viv_asm(COPY, multiplier, multAndoutZP, 16);
VXC_DP2x8(src, src, multiplier, \
VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}
__kernel void resize_1d_nearest_U8toU8_op
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_uchar16 src0, dst;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
vxc_ushort8 input_idx;
_viv_asm(COPY, input_idx, in_x_idx, 16);
VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
VXC_BitExtract(dst, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
vxc_ushort8 multiplier;
_viv_asm(COPY, multiplier, multAndoutZP, 16);
VXC_DP2x8(dst, dst, multiplier, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}
__kernel void resize_1d_nearest_I16toI16
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_short8 src;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.y;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.z;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
coord_in.x = in_x_idx.w;
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}
__kernel void resize_1d_nearest_I16toI16_op
(
__read_only image2d_array_t input,
__write_only image2d_array_t output,
int align_corners,
int half_pixel_centers
)
{
NEAREST_INDEX_PROCESS()
vxc_ushort8 src0, src1, dst0;
vxc_short8 dst;
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
int8 input_desc;
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
_viv_asm(MOV, coord_in.w, baseAddr);
int8 output_desc;
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
_viv_asm(MOV, coord_out.w, baseAddr);
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
vxc_ushort8 input_idx;
_viv_asm(COPY, input_idx, in_x_idx, 16);
VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
VXC_BitExtract(dst0, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
_viv_asm(COPY, dst, dst0, 8);
VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
}

File diff suppressed because it is too large Load Diff

View File

@ -176,8 +176,8 @@ static vsi_status op_optimize
reshape 3d input (xcn) --> 4d input (whcn) reshape 3d input (xcn) --> 4d input (whcn)
reshape 3d output(xcn) --> 4d output(whcn) reshape 3d output(xcn) --> 4d output(whcn)
*/ */
shape[0] = inputs[0]->attr.size[0]; shape[0] = 1;
shape[1] = 1; shape[1] = inputs[0]->attr.size[0];
shape[2] = inputs[0]->attr.size[1]; shape[2] = inputs[0]->attr.size[1];
shape[3] = inputs[0]->attr.size[2]; shape[3] = inputs[0]->attr.size[2];
dim = 4; dim = 4;

View File

@ -136,6 +136,7 @@ static vsi_bool op_check
IO_TYPE(D_I32, D_F32) IO_TYPE(D_I32, D_F32)
IO_TYPE(D_I32, D_I32) IO_TYPE(D_I32, D_I32)
IO_TYPE(D_I32, D_U32) IO_TYPE(D_I32, D_U32)
IO_TYPE(D_I32, D_F16)
IO_TYPE(D_I32, D_BOOL8) IO_TYPE(D_I32, D_BOOL8)
IO_TYPE(D_U32, D_F32) IO_TYPE(D_U32, D_F32)
IO_TYPE(D_U32, D_I32) IO_TYPE(D_U32, D_I32)
@ -176,6 +177,7 @@ static vsi_bool op_check
IO_TYPE(D_U8|Q_ASYM, D_F32) IO_TYPE(D_U8|Q_ASYM, D_F32)
IO_TYPE(D_U8|Q_ASYM, D_I32) IO_TYPE(D_U8|Q_ASYM, D_I32)
IO_TYPE(D_BF16, D_BF16) IO_TYPE(D_BF16, D_BF16)
IO_TYPE(D_U8, D_F16)
END_IO_TYPE_DECL(CAST) END_IO_TYPE_DECL(CAST)
if(!VALIDATE_OP_IO_TYPES(CAST, self, inputs, self->input.num, outputs, self->output.num)) { if(!VALIDATE_OP_IO_TYPES(CAST, self, inputs, self->input.num, outputs, self->output.num)) {
char* desc = generate_op_io_types_desc(inputs, char* desc = generate_op_io_types_desc(inputs,

View File

@ -37,6 +37,29 @@
#include "utils/vsi_nn_dtype_util.h" #include "utils/vsi_nn_dtype_util.h"
#include "utils/vsi_nn_constraint_check.h" #include "utils/vsi_nn_constraint_check.h"
static vsi_bool _enable_concat_optimize()
{
char *envctrl;
static int32_t enableOptimize = -1;
if (enableOptimize == -1)
{
enableOptimize = 1;
envctrl = getenv("VSI_NN_ENABLE_CONCAT_OPTIMIZE");
if (envctrl)
{
enableOptimize = atoi(envctrl);
}
}
if (enableOptimize == 1)
{
return TRUE;
}
return FALSE;
}
static int32_t _get_input_num static int32_t _get_input_num
( (
vsi_nn_node_t * self, vsi_nn_node_t * self,
@ -243,7 +266,8 @@ static vsi_status op_compute
status = VSI_SUCCESS; status = VSI_SUCCESS;
self->n = NULL; self->n = NULL;
if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs)) if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs)
&& _enable_concat_optimize())
{ {
iter = self->nn_param.concat.lcl_data; iter = self->nn_param.concat.lcl_data;
while( NULL != iter ) while( NULL != iter )
@ -397,7 +421,8 @@ static vsi_status op_optimize
status = VSI_SUCCESS; status = VSI_SUCCESS;
/* we don't create tensor view if the axis is not the highest dimension */ /* we don't create tensor view if the axis is not the highest dimension */
if (_is_highest_dimension(self, outputs) == FALSE || if (_is_highest_dimension(self, outputs) == FALSE ||
_is_same_quant(self, inputs, outputs) == FALSE) _is_same_quant(self, inputs, outputs) == FALSE ||
_enable_concat_optimize() == FALSE)
{ {
return status; return status;
} }

View File

@ -194,6 +194,7 @@ static vsi_bool op_check
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP) IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
IO_TYPE(D_I16|Q_DFP, D_I8|Q_DFP) IO_TYPE(D_I16|Q_DFP, D_I8|Q_DFP)
IO_TYPE(D_I16|Q_DFP, D_U8|Q_ASYM) IO_TYPE(D_I16|Q_DFP, D_U8|Q_ASYM)
IO_TYPE(D_I16|Q_DFP, D_F16)
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP) IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
IO_TYPE(D_I8|Q_DFP, D_I8|Q_ASYM) IO_TYPE(D_I8|Q_DFP, D_I8|Q_ASYM)
IO_TYPE(D_I8|Q_ASYM, D_I8|Q_ASYM) IO_TYPE(D_I8|Q_ASYM, D_I8|Q_ASYM)

View File

@ -31,6 +31,7 @@
#include "vsi_nn_prv.h" #include "vsi_nn_prv.h"
#include "vsi_nn_ops.h" #include "vsi_nn_ops.h"
#include "vsi_nn_tensor.h" #include "vsi_nn_tensor.h"
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h" #include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h" #include "kernel/vsi_nn_kernel.h"
/* /*
@ -73,6 +74,7 @@ static vsi_status op_compute
{ {
status = VSI_SUCCESS; status = VSI_SUCCESS;
} }
vsi_nn_kernel_param_release( &param ); vsi_nn_kernel_param_release( &param );
return status; return status;
} /* op_compute() */ } /* op_compute() */
@ -119,7 +121,7 @@ static vsi_bool op_setup
VSI_NN_ROUND_FLOOR VSI_NN_ROUND_FLOOR
); );
outputs[0]->attr.size[1] = inputs[1]->attr.size[2]; outputs[0]->attr.size[1] = inputs[0]->attr.size[1] * p->multiplier;
outputs[0]->attr.size[2] = inputs[0]->attr.size[2]; outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num; outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
} }

View File

@ -45,22 +45,30 @@ static vsi_status _eltwise_unary_op_compute
) )
{ {
vsi_status status = VSI_FAILURE; vsi_status status = VSI_FAILURE;
float alpha = 0;
vsi_nn_kernel_param_t * param = NULL;
if( NULL == self ) if( NULL == self )
{ {
return status; return status;
} }
param = vsi_nn_kernel_param_create();
alpha = self->nn_param.elu.alpha;
vsi_nn_kernel_param_add_float32( param, "alpha", alpha );
// TODO: This optimzie is a hack for gpu path, // TODO: This optimzie is a hack for gpu path,
// it should be moved to gpu kernel setup. // it should be moved to gpu kernel setup.
self->n = (vx_node)vsi_nn_kernel_selector( self->graph, self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
kernel_name, inputs, 1, outputs, 1, NULL ); kernel_name, inputs, 1, outputs, 1, param );
if( self->n ) if( self->n )
{ {
status = VSI_SUCCESS; status = VSI_SUCCESS;
} }
vsi_nn_kernel_param_release( &param );
return status; return status;
} /* _eltwise_op_compute() */ } /* _eltwise_op_compute() */
@ -152,6 +160,19 @@ static vsi_bool op_check
return TRUE; return TRUE;
} /* op_check() */ } /* op_check() */
static vsi_status op_init
(
vsi_nn_node_t * self
)
{
if (vsi_nn_compareVersion(self->graph, 1, 1, 29) == -1)
{
self->nn_param.elu.alpha = 1;
}
return VSI_SUCCESS;
} /* op_init() */
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@ -166,7 +187,7 @@ extern "C" {
{ \ { \
return _eltwise_unary_op_compute( ""#kernel_name, self, inputs, outputs ); \ return _eltwise_unary_op_compute( ""#kernel_name, self, inputs, outputs ); \
} \ } \
DEF_OP_REG(name, NULL, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1) DEF_OP_REG(name, op_init, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1)
DEF_ELEMENT_WISE_UNARY_OP( SIN, sin ); DEF_ELEMENT_WISE_UNARY_OP( SIN, sin );
DEF_ELEMENT_WISE_UNARY_OP( EXP, exp ); DEF_ELEMENT_WISE_UNARY_OP( EXP, exp );

View File

@ -73,6 +73,7 @@ static vsi_bool op_check
IO_TYPE(D_I32, D_I32, D_I32, D_F16, D_I32) IO_TYPE(D_I32, D_I32, D_I32, D_F16, D_I32)
IO_TYPE(D_I32, D_I32, D_F32, D_F16, D_F32) IO_TYPE(D_I32, D_I32, D_F32, D_F16, D_F32)
IO_TYPE(D_I32, D_I32, D_U8|Q_ASYM, D_F16, D_U8|Q_ASYM) IO_TYPE(D_I32, D_I32, D_U8|Q_ASYM, D_F16, D_U8|Q_ASYM)
IO_TYPE(D_I32, D_I32, D_F32, D_F32, D_U8|Q_ASYM)
END_IO_TYPE_DECL(HASHTABLE_LOOKUP) END_IO_TYPE_DECL(HASHTABLE_LOOKUP)
if (!VALIDATE_OP_IO_TYPES(HASHTABLE_LOOKUP, self, inputs, self->input.num, outputs, self->output.num)) if (!VALIDATE_OP_IO_TYPES(HASHTABLE_LOOKUP, self, inputs, self->input.num, outputs, self->output.num))
{ {

View File

@ -0,0 +1,298 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string.h>
#include <stdlib.h>
#include "vsi_nn_types.h"
#include "vsi_nn_log.h"
#include "vsi_nn_node.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_tensor.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
/*
Declare number of input and output.
*/
#define _INPUT_NUM (2)
#define _OUTPUT_NUM (1)
static vsi_status op_compute
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_status status = VSI_SUCCESS;
status = vsi_nn_internal_compute_node( self );
return status;
} /* op_compute() */
static vsi_bool op_check
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_nn_interp_param *p = NULL;
p = &self->nn_param.interp;
if ((p->pad_beg > 0) || (p->pad_end > 0))
{
VSILOGE("Only supports non-pos padding (cropping) for now ");
return FALSE;
}
return TRUE;
} /* op_check() */
static vsi_bool op_setup
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_nn_interp_param *p = NULL;
int32_t height_in_eff_, width_in_eff_;
int32_t height_out, width_out;
vsi_nn_internal_node_t* curr = NULL;
vsi_nn_internal_tensor_t *crop_tensor = NULL;
vsi_nn_tensor_t *crop_in_tensor = NULL;
float factor = 1.0f;
int32_t pad_beg = 0;
int32_t pad_end = 0;
if ( NULL == self )
{
return FALSE;
}
p = &self->nn_param.interp;
pad_beg = -p->pad_beg;
pad_end = -p->pad_end;
width_in_eff_ = inputs[0]->attr.size[0] + p->pad_beg + p->pad_end;
height_in_eff_ = inputs[0]->attr.size[1] + p->pad_beg + p->pad_end;
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
{
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
memcpy( outputs[0]->attr.size, inputs[0]->attr.size,
VSI_NN_MAX_DIM_NUM * sizeof( uint32_t ) );
if ((p->shrink_factor > 0) && (p->zoom_factor <= 0))
{
width_out = (width_in_eff_ - 1) / p->shrink_factor + 1;
height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
}
else if ((p->zoom_factor > 0) && (p->shrink_factor <= 0))
{
width_out = (width_in_eff_ - 1) * (p->zoom_factor - 1) + width_in_eff_;
height_out = (height_in_eff_ - 1) * (p->zoom_factor - 1) + height_in_eff_;
}
else if ((p->height > 0) && (p->width > 0))
{
width_out = p->width;
height_out = p->height;
}
else if ((p->zoom_factor > 0) && (p->shrink_factor > 0))
{
width_out = (width_in_eff_ - 1) / p->shrink_factor + 1;
height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
width_out = (width_out - 1) * (p->zoom_factor - 1) + width_out;
height_out = (height_out - 1) * (p->zoom_factor - 1) + height_out;
}
else if (NULL != inputs[1])
{
width_out = inputs[1]->attr.size[0];
height_out = inputs[1]->attr.size[1];
}
else
{
VSILOGE("Not support params ");
return FALSE;
}
if ((width_out < 0) || (height_out < 0) || (width_in_eff_ < 0) || (height_in_eff_ < 0))
{
VSILOGE("value shoud be positive: width_out %d height_out %d width_in_eff_ %d height_in_eff_ %d ",
width_out, height_out, width_in_eff_, height_in_eff_);
return FALSE;
}
outputs[0]->attr.size[0] = width_out;
outputs[0]->attr.size[1] = height_out;
}
factor = (float)(outputs[0]->attr.size[0]) / (float)(width_in_eff_);
if ((pad_beg > 0) || (pad_end > 0))
{
vsi_nn_tensor_attr_t attr;
int32_t use_virtual_tensor = 1;
int32_t *begin_dims;
int32_t *end_dims;
int32_t *stride_dims;
uint32_t i;
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
vsi_nn_internal_init_tensor_attr(&attr, &inputs[0]->attr.dtype, use_virtual_tensor);
crop_tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
crop_in_tensor = crop_tensor->t;
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_STRIDED_SLICE, 1, 1 );
curr->node->nn_param.strided_slice.begin_dims_num = inputs[0]->attr.dim_num;
curr->node->nn_param.strided_slice.end_dims_num = inputs[0]->attr.dim_num;
curr->node->nn_param.strided_slice.stride_dims_num = inputs[0]->attr.dim_num;
curr->node->nn_param.strided_slice.begin_mask = 0;
curr->node->nn_param.strided_slice.end_mask = 0;
curr->node->nn_param.strided_slice.shrink_axis_mask = 0;
begin_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
end_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
stride_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
for (i = 0; i < inputs[0]->attr.dim_num; i++)
{
stride_dims[i] = 1;
}
begin_dims[0] = pad_beg;
begin_dims[1] = pad_beg;
end_dims[0] = inputs[0]->attr.size[0] - pad_end;
end_dims[1] = inputs[0]->attr.size[1] - pad_end;
if (inputs[0]->attr.dim_num > 2)
{
for (i = 2 ; i < inputs[0]->attr.dim_num; i++)
{
begin_dims[i] = 0;
end_dims[i] = inputs[0]->attr.size[i];
}
}
curr->node->nn_param.strided_slice.begin_dims = begin_dims;
curr->node->nn_param.strided_slice.end_dims = end_dims;
curr->node->nn_param.strided_slice.stride_dims = stride_dims;
curr->inputs[0] = inputs[0];
curr->outputs[0] = crop_in_tensor;
vsi_nn_internal_setup_node(self, curr);
}
else
{
crop_in_tensor = inputs[0];
}
if ((width_in_eff_ == (int32_t)outputs[0]->attr.size[0]) && (height_in_eff_ == (int32_t)outputs[0]->attr.size[1]))
{
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 1, 1 );
curr->inputs[0] = crop_in_tensor;
curr->outputs[0] = outputs[0];
vsi_nn_internal_setup_node(self, curr);
}
else
{
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_INTERNAL, 1, 1 );
curr->node->nn_param.resize_internal.align_corners = vx_true_e;
curr->node->nn_param.resize_internal.factor = factor;
curr->node->nn_param.resize_internal.half_pixel_centers = vx_false_e;
curr->inputs[0] = crop_in_tensor;
curr->outputs[0] = outputs[0];
vsi_nn_internal_setup_node(self, curr);
}
return TRUE;
} /* op_setup() */
static vsi_status op_optimize
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs,
vsi_nn_opt_direction_e direction
)
{
vsi_status status;
status = VSI_SUCCESS;
vsi_nn_internal_optimize_node( self, direction );
return status;
} /* op_optimize() */
static vsi_status op_init
(
vsi_nn_node_t* self
)
{
vsi_status status = VSI_SUCCESS;
status = vsi_nn_internal_init_node_wksp(self);
self->nn_param.interp.height = 0;
self->nn_param.interp.width = 0;
self->nn_param.interp.pad_beg = 0;
self->nn_param.interp.pad_end = 0;
self->nn_param.interp.shrink_factor = 0;
self->nn_param.interp.zoom_factor = 0;
return status;
} /* op_init() */
static vsi_status op_deinit
(
vsi_nn_node_t* self
)
{
vsi_status status = VSI_SUCCESS;
vsi_nn_internal_deinit_node_wksp(self);
status = vsi_nn_op_common_deinit(self);
return status;
} /* op_deinit() */
__BEGIN_DECLS
/* Registrar */
DEF_OP_REG
(
/* op_name */ INTERP,
/* init */ op_init,
/* compute */ op_compute,
/* deinit */ op_deinit,
/* check */ op_check,
/* setup */ op_setup,
/* optimize */ op_optimize,
/* input_num */ _INPUT_NUM,
/* output_num */ _OUTPUT_NUM
);
__END_DECLS

View File

@ -155,11 +155,13 @@ static vsi_bool op_check
BEGIN_IO_TYPE_DECL(PERMUTE, 1, 1) BEGIN_IO_TYPE_DECL(PERMUTE, 1, 1)
IO_TYPE(D_F16, D_F16) IO_TYPE(D_F16, D_F16)
IO_TYPE(D_F16, D_F32) IO_TYPE(D_F16, D_F32)
IO_TYPE(D_I16, D_I16)
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP) IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP) IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM) IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM)
IO_TYPE(D_U8|Q_ASYM, D_F16) IO_TYPE(D_U8|Q_ASYM, D_F16)
IO_TYPE(D_U8|Q_ASYM, D_F32) IO_TYPE(D_U8|Q_ASYM, D_F32)
IO_TYPE(D_I8|Q_SYM_PC, D_I8|Q_SYM_PC)
IO_TYPE(D_BOOL8, D_BOOL8) IO_TYPE(D_BOOL8, D_BOOL8)
IO_TYPE(D_BOOL8, D_I8|Q_DFP) IO_TYPE(D_BOOL8, D_I8|Q_DFP)
IO_TYPE(D_F32, D_F32) IO_TYPE(D_F32, D_F32)

View File

@ -0,0 +1,207 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string.h>
#include <stdlib.h>
#include "vsi_nn_types.h"
#include "vsi_nn_log.h"
#include "vsi_nn_node.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_tensor.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
/*
Declare number of input and output.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
static vsi_bool _is_same_shape
(
vsi_nn_tensor_t * inputs,
uint32_t *sizes,
uint32_t dims
)
{
uint32_t i = 0;
if (inputs->attr.dim_num != dims)
return FALSE;
for (i = 0; i < dims; i++)
{
if (sizes[i] != inputs->attr.size[i])
return FALSE;
}
return TRUE;
}
static vsi_status op_compute
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_status status = VSI_FAILURE;
status = vsi_nn_internal_compute_node( self );
return status;
} /* op_compute() */
static vsi_status op_optimize
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs,
vsi_nn_opt_direction_e direction
)
{
if ( _is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num) )
{
return vsi_nn_internal_optimize_node(self, direction );
}
else
{
return VSI_SUCCESS;
}
} /* op_optimize() */
static vsi_bool op_check
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
return TRUE;
} /* op_check() */
static vsi_bool op_setup
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
float factor = self->nn_param.resize_1d.factor;
vsi_nn_internal_node_t* curr = NULL;
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
{
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
if (factor != 0)
{
outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
}
else
{
outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
}
outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
}
if (_is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num))
{
vsi_nn_internal_init_node_wksp( self );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 0, 0 );
curr->inputs[0] = inputs[0];
curr->outputs[0] = outputs[0];
vsi_nn_internal_setup_node(self, curr);
}
else if (VSI_NN_INTERPOLATION_BILINEAR == self->nn_param.resize_1d.type)
{
vsi_nn_internal_init_node_wksp( self );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL, 0, 0 );
curr->node->nn_param.resize_1d_bilinear_internal.align_corners = self->nn_param.resize_1d.align_corners;
curr->node->nn_param.resize_1d_bilinear_internal.factor = self->nn_param.resize_1d.factor;
curr->node->nn_param.resize_1d_bilinear_internal.half_pixel_centers = \
self->nn_param.resize_1d.half_pixel_centers;
curr->inputs[0] = inputs[0];
curr->outputs[0] = outputs[0];
vsi_nn_internal_setup_node(self, curr);
}
else if (VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR == self->nn_param.resize_1d.type)
{
vsi_nn_internal_init_node_wksp( self );
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL, 0, 0 );
curr->node->nn_param.resize_1d_nearest_internal.align_corners = self->nn_param.resize_1d.align_corners;
curr->node->nn_param.resize_1d_nearest_internal.factor = self->nn_param.resize_1d.factor;
curr->node->nn_param.resize_1d_nearest_internal.half_pixel_centers = \
self->nn_param.resize_1d.half_pixel_centers;
curr->inputs[0] = inputs[0];
curr->outputs[0] = outputs[0];
vsi_nn_internal_setup_node(self, curr);
}
return TRUE;
} /* op_setup() */
static vsi_status op_init
(
vsi_nn_node_t* self
)
{
return VSI_SUCCESS;
} /* op_init() */
static vsi_status op_deinit
(
vsi_nn_node_t* self
)
{
vsi_status status = VSI_SUCCESS;
status = vsi_nn_internal_deinit_node_wksp(self);
return status;
} /* op_deinit() */
__BEGIN_DECLS
/* Registrar */
DEF_OP_REG
(
/* op_name */ RESIZE_1D,
/* init */ op_init,
/* compute */ op_compute,
/* deinit */ op_deinit,
/* check */ op_check,
/* setup */ op_setup,
/* optimize */ op_optimize,
/* input_num */ _INPUT_NUM,
/* output_num */ _OUTPUT_NUM
);
__END_DECLS

View File

@ -0,0 +1,171 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string.h>
#include <stdlib.h>
#include "vsi_nn_types.h"
#include "vsi_nn_log.h"
#include "vsi_nn_node.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_tensor.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "utils/vsi_nn_constraint_check.h"
/*
Declare number of input and output.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
static vsi_status op_compute
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_status status = VSI_FAILURE;
int32_t align_corners = self->nn_param.resize_1d_bilinear_internal.align_corners;
int32_t half_pixel_centers = self->nn_param.resize_1d_bilinear_internal.half_pixel_centers;
vsi_nn_kernel_param_t * param = NULL;
param = vsi_nn_kernel_param_create();
vsi_nn_kernel_param_add_int32( param, "align_corners", align_corners );
vsi_nn_kernel_param_add_int32( param, "half_pixel_centers", half_pixel_centers );
self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
"resize_1d_bilinear",
&inputs[0], 1,
&outputs[0], 1, param );
if ( self->n )
{
status = VSI_SUCCESS;
}
vsi_nn_kernel_param_release( &param );
return status;
} /* op_compute() */
static vsi_bool op_check
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
BEGIN_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL, 1, 1)
IO_TYPE(D_F16, D_U8|Q_ASYM)
IO_TYPE(D_F16, D_F16)
IO_TYPE(D_BF16, D_BF16)
IO_TYPE(D_F32, D_F32)
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM)
IO_TYPE(D_U8|Q_ASYM, D_F16)
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
END_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL)
if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_BILINEAR_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
char* desc = generate_op_io_types_desc(inputs,
self->input.num, outputs, self->output.num);
VSILOGE("Inputs/Outputs data type not support: %s", desc);
destroy_op_io_types_desc(desc);
return FALSE;
}
return TRUE;
} /* op_check() */
static vsi_bool op_setup
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
float factor = self->nn_param.resize_1d_bilinear_internal.factor;
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
{
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
if (factor != 0)
{
outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
}
else
{
outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
}
outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
}
return TRUE;
} /* op_setup() */
static vsi_status op_init
(
vsi_nn_node_t* self
)
{
return VSI_SUCCESS;
} /* op_init() */
static vsi_status op_deinit
(
vsi_nn_node_t* self
)
{
vsi_status status = VSI_SUCCESS;
status = vsi_nn_op_common_deinit(self);
return status;
} /* op_deinit() */
__BEGIN_DECLS
/* Registrar */
DEF_OP_REG
(
/* op_name */ RESIZE_1D_BILINEAR_INTERNAL,
/* init */ op_init,
/* compute */ op_compute,
/* deinit */ op_deinit,
/* check */ op_check,
/* setup */ op_setup,
/* optimize */ NULL,
/* input_num */ _INPUT_NUM,
/* output_num */ _OUTPUT_NUM
);
__END_DECLS

View File

@ -0,0 +1,170 @@
/****************************************************************************
*
* Copyright (c) 2020 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <string.h>
#include <stdlib.h>
#include "vsi_nn_types.h"
#include "vsi_nn_log.h"
#include "vsi_nn_node.h"
#include "vsi_nn_prv.h"
#include "vsi_nn_ops.h"
#include "vsi_nn_tensor.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "utils/vsi_nn_constraint_check.h"
/*
Declare number of input and output.
*/
#define _INPUT_NUM (1)
#define _OUTPUT_NUM (1)
static vsi_status op_compute
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
vsi_status status = VSI_FAILURE;
int32_t align_corners = self->nn_param.resize_1d_nearest_internal.align_corners;
int32_t half_pixel_centers = self->nn_param.resize_1d_nearest_internal.half_pixel_centers;
vsi_nn_kernel_param_t * param = NULL;
param = vsi_nn_kernel_param_create();
vsi_nn_kernel_param_add_int32( param, "align_corners", align_corners );
vsi_nn_kernel_param_add_int32( param, "half_pixel_centers", half_pixel_centers );
self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
"resize_1d_nearest",
&inputs[0], 1,
&outputs[0], 1, param );
if ( self->n )
{
status = VSI_SUCCESS;
}
vsi_nn_kernel_param_release( &param );
return status;
} /* op_compute() */
static vsi_bool op_check
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
BEGIN_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL, 1, 1)
IO_TYPE(D_F16, D_F16)
IO_TYPE(D_BF16, D_BF16)
IO_TYPE(D_F32, D_F32)
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM)
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
END_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL)
if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_NEAREST_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
char* desc = generate_op_io_types_desc(inputs,
self->input.num, outputs, self->output.num);
VSILOGE("Inputs/Outputs data type not support: %s", desc);
destroy_op_io_types_desc(desc);
return FALSE;
}
return TRUE;
} /* op_check() */
static vsi_bool op_setup
(
vsi_nn_node_t * self,
vsi_nn_tensor_t ** inputs,
vsi_nn_tensor_t ** outputs
)
{
float factor = self->nn_param.resize_1d_nearest_internal.factor;
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
{
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
if (factor != 0)
{
outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
}
else
{
outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
}
outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
}
return TRUE;
} /* op_setup() */
static vsi_status op_init
(
vsi_nn_node_t* self
)
{
return VSI_SUCCESS;
} /* op_init() */
static vsi_status op_deinit
(
vsi_nn_node_t* self
)
{
vsi_status status = VSI_SUCCESS;
status = vsi_nn_op_common_deinit(self);
return status;
} /* op_deinit() */
__BEGIN_DECLS
/* Registrar */
DEF_OP_REG
(
/* op_name */ RESIZE_1D_NEAREST_INTERNAL,
/* init */ op_init,
/* compute */ op_compute,
/* deinit */ op_deinit,
/* check */ op_check,
/* setup */ op_setup,
/* optimize */ NULL,
/* input_num */ _INPUT_NUM,
/* output_num */ _OUTPUT_NUM
);
__END_DECLS

View File

@ -163,12 +163,17 @@ static vsi_bool op_check
ret = FALSE; ret = FALSE;
} }
if(ret)
{ {
BEGIN_IO_TYPE_DECL(SVDF, 5, 2) BEGIN_IO_TYPE_DECL(SVDF, 5, 2)
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_F16) IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_F16)
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_F16) IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_F16)
IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_F16) IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_F16)
IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_F32) IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_F32)
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_NONE)
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_NONE)
IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_NONE)
IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_NONE)
END_IO_TYPE_DECL(SVDF) END_IO_TYPE_DECL(SVDF)
if(!VALIDATE_OP_IO_TYPES(SVDF, self, inputs, self->input.num, outputs, self->output.num)) { if(!VALIDATE_OP_IO_TYPES(SVDF, self, inputs, self->input.num, outputs, self->output.num)) {
char* desc = generate_op_io_types_desc(inputs, char* desc = generate_op_io_types_desc(inputs,

View File

@ -117,8 +117,7 @@ static void _try_pack_tensor_data
{ {
*p_sz = (uint64_t)bytes; *p_sz = (uint64_t)bytes;
} }
free( data ); vsi_nn_safe_free( data );
data = NULL;
} }
} }
} /* _pack_tensor_data() */ } /* _pack_tensor_data() */
@ -417,6 +416,8 @@ static _op_param_gen_t s_op_gen[] =
/* PRE_PROCESS_NV12 */ NULL, /* PRE_PROCESS_NV12 */ NULL,
/* SCATTER_ND */ NULL, /* SCATTER_ND */ NULL,
/* DECONVOLUTION1D */ NULL, /* DECONVOLUTION1D */ NULL,
/* INTERP */ NULL,
/* RESIZE_1D */ NULL,
}; };
_compiler_assert( _cnt_of_array(s_op_gen) == VSI_NN_OP_NUM, vsi_nn_code_generator_c ); _compiler_assert( _cnt_of_array(s_op_gen) == VSI_NN_OP_NUM, vsi_nn_code_generator_c );

View File

@ -149,7 +149,7 @@ vsi_nn_tensor_t* vsi_nn_Concat
src = j; src = j;
memcpy( &buffer[dst * type_bytes], &tmp[src * type_bytes], type_bytes ); memcpy( &buffer[dst * type_bytes], &tmp[src * type_bytes], type_bytes );
} }
free(tmp); vsi_nn_safe_free( tmp );
offset += strides[axis] * tensors[i]->attr.size[axis]; offset += strides[axis] * tensors[i]->attr.size[axis];
} }
tensor_out = vsi_nn_CreateTensorFromData( graph, buffer, &output_attr ); tensor_out = vsi_nn_CreateTensorFromData( graph, buffer, &output_attr );
@ -221,11 +221,7 @@ vsi_nn_tensor_t* vsi_nn_ConvertTensorDtype
} }
} }
if( src_buf ) vsi_nn_safe_free( src_buf );
{
free( src_buf );
src_buf = NULL;
}
if( dst_buf ) if( dst_buf )
{ {
free( dst_buf ); free( dst_buf );
@ -333,10 +329,7 @@ vsi_nn_tensor_t* vsi_nn_TensorAdd
error: error:
for ( i = 0; i < tensor_num; i++ ) for ( i = 0; i < tensor_num; i++ )
{ {
if ( buffer[i] ) vsi_nn_safe_free( buffer[i] );
{
free(buffer[i]);
}
} }
if( tmp ) if( tmp )
{ {

View File

@ -710,33 +710,6 @@ vsi_bool vsi_nn_CheckFilePath
return FALSE; return FALSE;
} /* vsi_nn_CheckFilePath() */ } /* vsi_nn_CheckFilePath() */
void vsi_nn_GetFP32MultiAndPostShift
(
vx_float32 mult,
vx_uint16 *M0,
vx_int8 *N
)
{
vx_uint32 uintMult = *((vx_uint32*)(&mult));
vx_uint32 tmpMultiply = 0;
vx_int32 exp = 0;
vx_uint32 postShiftBit6to5 = 0;
vx_uint32 postShift = 0;
vx_int8 tmpPostShift = 0;
tmpMultiply = (uintMult & 0x7FFFFF) >> 8;
*M0 = (vx_uint16)((1U << 15) + tmpMultiply);
exp = (uintMult & 0x7F800000) >> 23; /* postShift is Scale's exp*/
tmpPostShift = 15 - ((vx_int8)exp - 127);
postShift = tmpPostShift & 0x1F;
tmpPostShift = tmpPostShift >> 5;
postShiftBit6to5 = tmpPostShift & 3;
*N = (vx_int8)(((postShiftBit6to5 << 5) | (postShift & 0x1F)));
*N = (((vx_int32)*N << 25) >> 25);
}/* vsi_nn_GetFP32MultiAndPostShift() */
typedef struct typedef struct
{ {
uint8_t* raw_addr; uint8_t* raw_addr;

View File

@ -520,6 +520,7 @@ static vx_tensor _create_const_raw_tensor
vx_tensor tensor = NULL; vx_tensor tensor = NULL;
vx_tensor_create_params_t params; vx_tensor_create_params_t params;
float * scales = NULL; float * scales = NULL;
int32_t * zeroPoints = NULL;
memset( &params, 0, sizeof( vx_tensor_create_params_t ) ); memset( &params, 0, sizeof( vx_tensor_create_params_t ) );
params.num_of_dims = attr.dim_num; params.num_of_dims = attr.dim_num;
@ -539,12 +540,14 @@ static vx_tensor _create_const_raw_tensor
#ifdef VSI_PERCHANNEL_QUANTIZATION_SUPPORT #ifdef VSI_PERCHANNEL_QUANTIZATION_SUPPORT
// This is a hack that driver doesn't support const scale // This is a hack that driver doesn't support const scale
scales = (float *)malloc(sizeof(float) * attr.dtype.scale_dim); scales = (float *)malloc(sizeof(float) * attr.dtype.scale_dim);
zeroPoints = (int32_t *)malloc(sizeof(int32_t) * attr.dtype.zero_points_dim);
memcpy(scales, attr.dtype.scales, attr.dtype.scale_dim * sizeof(float)); memcpy(scales, attr.dtype.scales, attr.dtype.scale_dim * sizeof(float));
memcpy(zeroPoints, attr.dtype.zero_points, attr.dtype.zero_points_dim * sizeof(float));
params.quant_data.affinePerChannel.channelDim = attr.dtype.channel_dim; params.quant_data.affinePerChannel.channelDim = attr.dtype.channel_dim;
params.quant_data.affinePerChannel.scaleCount = attr.dtype.scale_dim; params.quant_data.affinePerChannel.scaleCount = attr.dtype.scale_dim;
params.quant_data.affinePerChannel.scales = scales; params.quant_data.affinePerChannel.scales = scales;
params.quant_data.affinePerChannel.zeroPoint = NULL; params.quant_data.affinePerChannel.zeroPoint = zeroPoints;
params.quant_data.affinePerChannel.zeroPointCount = 0; params.quant_data.affinePerChannel.zeroPointCount = attr.dtype.zero_points_dim;
break; break;
#else #else
VSILOGE( "can't support qnt_type VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC." ); VSILOGE( "can't support qnt_type VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC." );
@ -580,6 +583,10 @@ static vx_tensor _create_const_raw_tensor
{ {
free( scales ); free( scales );
} }
if (zeroPoints)
{
free( zeroPoints );
}
return NULL; return NULL;
} }
} }
@ -620,6 +627,10 @@ static vx_tensor _create_const_raw_tensor
{ {
free( scales ); free( scales );
} }
if (zeroPoints)
{
free( zeroPoints );
}
return tensor; return tensor;
} /* _create_const_raw_tensor() */ } /* _create_const_raw_tensor() */
@ -689,6 +700,8 @@ static void _convert_const_I8toU8
if ( tensor->t ) vxReleaseTensor(&tensor->t); if ( tensor->t ) vxReleaseTensor(&tensor->t);
tensor->t = vsi_nn_CreateRawTensorFromData(graph, data, attr); tensor->t = vsi_nn_CreateRawTensorFromData(graph, data, attr);
vsi_nn_safe_free( data );
}/* _convert_const_I8toU8() */ }/* _convert_const_I8toU8() */
static vsi_status _convert_graph_const_tensor static vsi_status _convert_graph_const_tensor

View File

@ -29,25 +29,49 @@
#include "vsi_nn_log.h" #include "vsi_nn_log.h"
#include "vsi_nn_types.h" #include "vsi_nn_types.h"
#ifdef __ANDROID__
#if ANDROID_SDK_VERSION >= 30
static const char* ENV_LOG_LEVEL = "vendor.VSI_NN_LOG_LEVEL";
#else
static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
#endif
#else
static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
#endif
int get_env_as_int(const char* env, int default_value) {
int value = default_value;
#ifdef __ANDROID__
{
char value_str[100];
int status = __system_property_get(env, value_str);
if (status) {
value = atoi(value_str);
}
}
#else
{
char* env_s = getenv(env);
if (env_s) {
value = atoi(env_s);
}
}
#endif
return value;
}
static vsi_bool _check_log_level static vsi_bool _check_log_level
( (
vsi_nn_log_level_e level vsi_nn_log_level_e level
) )
{ {
char *env_level_s;
static vsi_nn_log_level_e env_level = VSI_NN_LOG_UNINIT; static vsi_nn_log_level_e env_level = VSI_NN_LOG_UNINIT;
if(env_level == VSI_NN_LOG_UNINIT) if(env_level == VSI_NN_LOG_UNINIT)
{ {
env_level_s = getenv("VSI_NN_LOG_LEVEL"); env_level = (vsi_nn_log_level_e)get_env_as_int(ENV_LOG_LEVEL, VSI_NN_LOG_WARN);
if(env_level_s)
{
env_level = (vsi_nn_log_level_e)atoi(env_level_s);
}
else
{
env_level = VSI_NN_LOG_WARN;
}
} }
if(env_level >= level) if(env_level >= level)

View File

@ -844,7 +844,7 @@ float * vsi_nn_ConvertTensorToFloat32Data
if( !tensor->attr.is_created_from_handle ) if( !tensor->attr.is_created_from_handle )
{ {
if(tensor_data)free(tensor_data); vsi_nn_safe_free( tensor_data );
} }
return data; return data;
} /* vsi_nn_ConvertTensorToFloat32Data() */ } /* vsi_nn_ConvertTensorToFloat32Data() */
@ -1095,7 +1095,7 @@ void vsi_nn_SaveTensorToTextByFp32
} }
fwrite( buf, count, 1, fp ); fwrite( buf, count, 1, fp );
fclose( fp ); fclose( fp );
free( data ); vsi_nn_safe_free( data );
} /* vsi_nn_SaveTensorToTextByFp32() */ } /* vsi_nn_SaveTensorToTextByFp32() */
void vsi_nn_SaveTensorToText void vsi_nn_SaveTensorToText
@ -1124,7 +1124,7 @@ void vsi_nn_SaveTensorToText
sz = vsi_nn_GetElementNum( tensor ); sz = vsi_nn_GetElementNum( tensor );
vsi_nn_SaveDataToText( filename, data, sz, vsi_nn_SaveDataToText( filename, data, sz,
tensor->attr.dtype.vx_type, seperator ); tensor->attr.dtype.vx_type, seperator );
free( data ); vsi_nn_safe_free( data );
} /* vsi_nn_SaveTensorToText() */ } /* vsi_nn_SaveTensorToText() */
void vsi_nn_SaveDataToText void vsi_nn_SaveDataToText
@ -1219,7 +1219,7 @@ void vsi_nn_SaveTensorToBinary
} }
fwrite( data, sz, 1, fp ); fwrite( data, sz, 1, fp );
fclose( fp ); fclose( fp );
free( data ); vsi_nn_safe_free( data );
} /* vsi_nn_SaveTensorToBinary() */ } /* vsi_nn_SaveTensorToBinary() */
vsi_nn_tensor_t * vsi_nn_CreateTensorFromData vsi_nn_tensor_t * vsi_nn_CreateTensorFromData
@ -1539,7 +1539,7 @@ void vsi_nn_TransposeTensor
VSILOGE( "Copy transpose data fail with code %#x.", status ); VSILOGE( "Copy transpose data fail with code %#x.", status );
} }
free( buf ); vsi_nn_safe_free( buf );
free( dst ); free( dst );
} /* vsi_nn_TransposeTensor() */ } /* vsi_nn_TransposeTensor() */
@ -1588,7 +1588,7 @@ void vsi_nn_PermuteTensor
if( perm[i] >= dim_num ) if( perm[i] >= dim_num )
{ {
VSILOGW( "Incorrect perm %d", perm[i] ); VSILOGW( "Incorrect perm %d", perm[i] );
if( buf ) { free(buf); buf = NULL; } vsi_nn_safe_free( buf );
if( dst ) { free(dst); dst = NULL; } if( dst ) { free(dst); dst = NULL; }
return; return;
} }
@ -1603,7 +1603,7 @@ void vsi_nn_PermuteTensor
VSILOGE( "Copy permute data fail with code %#x.", status ); VSILOGE( "Copy permute data fail with code %#x.", status );
} }
if( buf ) { free(buf); buf = NULL; } vsi_nn_safe_free( buf );
if( dst ) { free(dst); dst = NULL; } if( dst ) { free(dst); dst = NULL; }
} /* vsi_nn_PermuteTensor() */ } /* vsi_nn_PermuteTensor() */
@ -2241,7 +2241,7 @@ void vsi_nn_reshuffle_weight_data
} }
vsi_nn_CopyDataToTensor( graph, weights, weight_data ); vsi_nn_CopyDataToTensor( graph, weights, weight_data );
vsi_nn_Free( buffer ); vsi_nn_Free( buffer );
vsi_nn_Free( weight_data ); vsi_nn_safe_free( weight_data );
} }
vsi_nn_tensor_t* vsi_nn_ConcatTensor_impl vsi_nn_tensor_t* vsi_nn_ConcatTensor_impl