Update internal to REL/v1.1.30
Commit: 6ccb425e Signed-off-by: Jiang Bo <bo.jiang@verisilicon.com>
This commit is contained in:
parent
62898a4419
commit
def53f4b5c
|
|
@ -0,0 +1,335 @@
|
|||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
##
|
||||
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
|
||||
|
||||
# User-specific files
|
||||
*.suo
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
|
||||
*-[Dd]ebug/
|
||||
*-[Dd]ebugPublic/
|
||||
*-[Rr]elease/
|
||||
*-[Rr]eleases/
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
# Build results
|
||||
*.o
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
NNApi0.3/
|
||||
NNApi0.4/
|
||||
OpenVX1.2/
|
||||
bazel-bin
|
||||
bazel-genfiles
|
||||
bazel-out
|
||||
bazel-ovxlib
|
||||
bazel-testlogs
|
||||
|
||||
# Visual Studio 2015/2017 cache/options directory
|
||||
.vs/
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# VS code
|
||||
.vscode
|
||||
|
||||
# Visual Studio 2017 auto generated files
|
||||
Generated\ Files/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# NUNIT
|
||||
*.VisualState.xml
|
||||
TestResult.xml
|
||||
|
||||
# Build Results of an ATL Project
|
||||
[Dd]ebugPS/
|
||||
[Rr]eleasePS/
|
||||
dlldata.c
|
||||
|
||||
# Benchmark Results
|
||||
BenchmarkDotNet.Artifacts/
|
||||
|
||||
# .NET Core
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
**/Properties/launchSettings.json
|
||||
|
||||
# StyleCop
|
||||
StyleCopReport.xml
|
||||
|
||||
# Files built by Visual Studio
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_i.h
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*.log
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.svclog
|
||||
*.scc
|
||||
|
||||
# Chutzpah Test files
|
||||
_Chutzpah*
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
*.sap
|
||||
|
||||
# Visual Studio Trace Files
|
||||
*.e2e
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
$tf/
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
*.DotSettings.user
|
||||
|
||||
# JustCode is a .NET coding add-in
|
||||
.JustCode
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# AxoCover is a Code Coverage Tool
|
||||
.axoCover/*
|
||||
!.axoCover/settings.json
|
||||
|
||||
# Visual Studio code coverage results
|
||||
*.coverage
|
||||
*.coveragexml
|
||||
|
||||
# NCrunch
|
||||
_NCrunch_*
|
||||
.*crunch*.local.xml
|
||||
nCrunchTemp_*
|
||||
|
||||
# MightyMoose
|
||||
*.mm.*
|
||||
AutoTest.Net/
|
||||
|
||||
# Web workbench (sass)
|
||||
.sass-cache/
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.[Pp]ublish.xml
|
||||
*.azurePubxml
|
||||
# Note: Comment the next line if you want to checkin your web deploy settings,
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
*.pubxml
|
||||
*.publishproj
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
PublishScripts/
|
||||
|
||||
# NuGet Packages
|
||||
*.nupkg
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
**/[Pp]ackages/*
|
||||
# except build/, which is used as an MSBuild target.
|
||||
!**/[Pp]ackages/build/
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/[Pp]ackages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignorable files
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
csx/
|
||||
*.build.csdef
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
ecf/
|
||||
rcf/
|
||||
|
||||
# Windows Store app package directories and files
|
||||
AppPackages/
|
||||
BundleArtifacts/
|
||||
Package.StoreAssociation.xml
|
||||
_pkginfo.txt
|
||||
*.appx
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
*.[Cc]ache
|
||||
# but keep track of directories ending in .cache
|
||||
!*.[Cc]ache/
|
||||
|
||||
# Others
|
||||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.jfm
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
orleans.codegen.cs
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
|
||||
# SQL Server files
|
||||
*.mdf
|
||||
*.ldf
|
||||
*.ndf
|
||||
|
||||
# Business Intelligence projects
|
||||
*.rdl.data
|
||||
*.bim.layout
|
||||
*.bim_*.settings
|
||||
|
||||
# Microsoft Fakes
|
||||
FakesAssemblies/
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
*.GhostDoc.xml
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
.ntvs_analysis.dat
|
||||
node_modules/
|
||||
|
||||
# TypeScript v1 declaration files
|
||||
typings/
|
||||
|
||||
# Visual Studio 6 build log
|
||||
*.plg
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
*.opt
|
||||
|
||||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||
*.vbw
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
_Pvt_Extensions
|
||||
|
||||
# Paket dependency manager
|
||||
.paket/paket.exe
|
||||
paket-files/
|
||||
|
||||
# FAKE - F# Make
|
||||
.fake/
|
||||
|
||||
# JetBrains Rider
|
||||
.idea/
|
||||
*.sln.iml
|
||||
|
||||
# CodeRush
|
||||
.cr/
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Cake - Uncomment if you are using it
|
||||
# tools/**
|
||||
# !tools/packages.config
|
||||
|
||||
# Tabs Studio
|
||||
*.tss
|
||||
|
||||
# Telerik's JustMock configuration file
|
||||
*.jmconfig
|
||||
|
||||
# BizTalk build output
|
||||
*.btp.cs
|
||||
*.btm.cs
|
||||
*.odx.cs
|
||||
*.xsd.cs
|
||||
|
||||
# OpenCover UI analysis results
|
||||
OpenCover/
|
||||
|
||||
# Azure Stream Analytics local run output
|
||||
ASALocalRun/
|
||||
|
||||
# IDE
|
||||
.settings/
|
||||
|
|
@ -144,3 +144,5 @@ DEF_OP(PRE_PROCESS_YUV444)
|
|||
DEF_OP(PRE_PROCESS_NV12)
|
||||
DEF_OP(SCATTER_ND)
|
||||
DEF_OP(DECONVOLUTION1D)
|
||||
DEF_OP(INTERP)
|
||||
DEF_OP(RESIZE_1D)
|
||||
|
|
|
|||
|
|
@ -14,3 +14,5 @@ DEF_OP(RESIZE_NEAREST_INTERNAL)
|
|||
DEF_OP(DEPTH2SPACE_INTERNAL)
|
||||
DEF_OP(GRUCELL_ACTIVATION_INTERNAL)
|
||||
DEF_OP(GRUCELL_ACTIVATION_INTERNAL_SMA)
|
||||
DEF_OP(RESIZE_1D_BILINEAR_INTERNAL)
|
||||
DEF_OP(RESIZE_1D_NEAREST_INTERNAL)
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ typedef struct _vsi_nn_elu_param
|
|||
{
|
||||
/* elu layer local data structure */
|
||||
vsi_nn_elu_lcl_data local;
|
||||
float alpha;
|
||||
} vsi_nn_elu_param;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_INTERP_H
|
||||
#define _VSI_NN_OP_INTERP_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
typedef struct _vsi_nn_interp_param
|
||||
{
|
||||
struct _interp_local_data_t* local;
|
||||
int32_t height; //height of output
|
||||
int32_t width; //width of output
|
||||
int32_t zoom_factor; // zoom factor
|
||||
int32_t shrink_factor; // shrink factor
|
||||
int32_t pad_beg; //padding at begin of input
|
||||
int32_t pad_end; //padding at end of intput
|
||||
} vsi_nn_interp_param;
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_RESIZE_1D_H
|
||||
#define _VSI_NN_OP_RESIZE_1D_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
typedef struct _vsi_nn_resize_1d_param
|
||||
{
|
||||
struct _resize_1d_local_data_t* local;
|
||||
vsi_enum type;
|
||||
float factor;
|
||||
int32_t size[2];
|
||||
vsi_bool align_corners;
|
||||
vsi_bool half_pixel_centers;
|
||||
} vsi_nn_resize_1d_param;
|
||||
|
||||
_compiler_assert(offsetof(vsi_nn_resize_1d_param, local) == 0, \
|
||||
vsi_nn_resize_1d_h );
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
|
||||
#define _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
typedef struct _vsi_nn_resize_1d_bilinear_internal_param
|
||||
{
|
||||
struct _resize_1d_bilinear_internal_local_data_t* local;
|
||||
vsi_bool align_corners;
|
||||
vsi_bool half_pixel_centers;
|
||||
float factor;
|
||||
} vsi_nn_resize_1d_bilinear_internal_param;
|
||||
|
||||
_compiler_assert(offsetof(vsi_nn_resize_1d_bilinear_internal_param, local) == 0, \
|
||||
vsi_nn_resize_1d_bilinear_internal_h );
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
|
||||
#define _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
typedef struct _vsi_nn_resize_1d_nearest_internal_param
|
||||
{
|
||||
struct _resize_1d_nearest_internal_local_data_t* local;
|
||||
vsi_bool align_corners;
|
||||
vsi_bool half_pixel_centers;
|
||||
float factor;
|
||||
} vsi_nn_resize_1d_nearest_internal_param;
|
||||
|
||||
_compiler_assert(offsetof(vsi_nn_resize_1d_nearest_internal_param, local) == 0, \
|
||||
vsi_nn_resize_1d_nearest_internal_h );
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -195,13 +195,6 @@ OVXLIB_API vsi_bool vsi_nn_CheckFilePath
|
|||
const char *path
|
||||
);
|
||||
|
||||
OVXLIB_API void vsi_nn_GetFP32MultiAndPostShift
|
||||
(
|
||||
vx_float32 mult,
|
||||
vx_uint16 *M0,
|
||||
vx_int8 *N
|
||||
);
|
||||
|
||||
/**
|
||||
* Malloc aligned buffer
|
||||
* Malloc address and size aligned buffer.
|
||||
|
|
|
|||
|
|
@ -32,6 +32,13 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
vx_tensor vsi_nn_CreateRawTensorFromData
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
uint8_t * data,
|
||||
vsi_nn_tensor_attr_t * attr
|
||||
);
|
||||
|
||||
vsi_status vsi_nn_OptimizeGraph
|
||||
(
|
||||
vsi_nn_graph_t* graph,
|
||||
|
|
|
|||
|
|
@ -158,6 +158,10 @@
|
|||
#include "ops/vsi_nn_op_squeeze.h"
|
||||
#include "ops/vsi_nn_op_expand_broadcast.h"
|
||||
#include "ops/vsi_nn_op_deconvolution1d.h"
|
||||
#include "ops/vsi_nn_op_interp.h"
|
||||
#include "ops/vsi_nn_op_resize_1d.h"
|
||||
#include "ops/vsi_nn_op_resize_1d_bilinear_internal.h"
|
||||
#include "ops/vsi_nn_op_resize_1d_nearest_internal.h"
|
||||
/* custom node head define define */
|
||||
#include "custom/vsi_nn_custom_node_type.h"
|
||||
|
||||
|
|
@ -302,6 +306,10 @@ typedef union _vsi_nn_nn_param
|
|||
vsi_nn_squeeze_param squeeze;
|
||||
vsi_nn_expand_broadcast_param expand_broadcast;
|
||||
vsi_nn_deconvolution1d_param deconvolution1d;
|
||||
vsi_nn_interp_param interp;
|
||||
vsi_nn_resize_1d_param resize_1d;
|
||||
vsi_nn_resize_1d_bilinear_internal_param resize_1d_bilinear_internal;
|
||||
vsi_nn_resize_1d_nearest_internal_param resize_1d_nearest_internal;
|
||||
uint8_t client_param[128];
|
||||
|
||||
/* custom node data struct define */
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ extern "C"{
|
|||
|
||||
#define VSI_NN_VERSION_MAJOR 1
|
||||
#define VSI_NN_VERSION_MINOR 1
|
||||
#define VSI_NN_VERSION_PATCH 28
|
||||
#define VSI_NN_VERSION_PATCH 30
|
||||
#define VSI_NN_VERSION \
|
||||
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
|
||||
|
||||
|
|
|
|||
|
|
@ -117,6 +117,7 @@ LOCAL_C_INCLUDES += \
|
|||
LOCAL_CFLAGS := \
|
||||
-DLINUX \
|
||||
-D'OVXLIB_API=__attribute__((visibility("default")))' \
|
||||
-DANDROID_SDK_VERSION=$(PLATFORM_SDK_VERSION)\
|
||||
-Wno-sign-compare \
|
||||
-Wno-implicit-function-declaration \
|
||||
-Wno-sometimes-uninitialized \
|
||||
|
|
|
|||
|
|
@ -168,12 +168,14 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
#define SCALAR_INPUT_SCALE (2)
|
||||
#define SCALAR_INPUT_TAIL (3)
|
||||
#define SCALAR_OUTPUT_SCALE (4)
|
||||
#define SCALAR_OUTPUT_ZP (5)
|
||||
#define SCALAR_ALPHA (6)
|
||||
#define _CL_PARAM_NUM _cnt_of_array(kernel_param_def)
|
||||
|
||||
/*
|
||||
|
|
@ -293,6 +295,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
float inputTail = (float)inputs[0]->attr.dtype.zero_point * inputScale;
|
||||
float outputScale = outputs[0]->attr.dtype.scale;
|
||||
float outputZP = (float)outputs[0]->attr.dtype.zero_point + 0.5f;
|
||||
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
||||
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
|
|
@ -331,6 +334,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
graph, F32, &outputScale );
|
||||
node_params[SCALAR_OUTPUT_ZP] = vsi_nn_kernel_scalar_create(
|
||||
graph, F32, &outputZP );
|
||||
node_params[SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
|
||||
graph, F32, &alpha );
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
|
||||
|
|
@ -369,6 +374,11 @@ OnError:
|
|||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_ZP] );
|
||||
}
|
||||
|
||||
if (node_params[SCALAR_ALPHA])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALPHA] );
|
||||
}
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -356,7 +356,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
int32_t out_shape[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
int32_t out_rs_flg = 0;
|
||||
int32_t axis_num = 0;
|
||||
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
|
||||
size_t axis_num_temp = 0;
|
||||
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
|
||||
int32_t keep_dim = vsi_nn_kernel_param_get_int32( params, "keep_dim" );
|
||||
int32_t first_axis = axis[0];
|
||||
int32_t i = 0;
|
||||
|
|
@ -369,6 +370,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float dim_ratio = (float)1.0 / (float)(width * height);
|
||||
|
||||
axis_num = (int32_t)axis_num_temp;
|
||||
|
||||
if(inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP)
|
||||
{
|
||||
if (inputs[0]->attr.dtype.fl > 0)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,305 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
#define _RESIZE_1D_BILINEAR_KERNEL_SOURCE() "resize_1d_bilinear"
|
||||
|
||||
#define STR(a) #a
|
||||
// Add kernel hashtable here
|
||||
#define RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
|
||||
(( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) )
|
||||
|
||||
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
|
||||
CVIVANTE_NAMESPACE("cl.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
|
||||
_RESIZE_1D_BILINEAR_KERNEL_SOURCE() }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _resize_1d_bilinear_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
PACK_KERNEL_MAP( F32, F32),
|
||||
PACK_KERNEL_MAP( U8, U8),
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _RESIZE_1D_BILINEAR_PARAM_NUM _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
|
||||
|
||||
|
||||
#define SCALAR_SCALE_X (2)
|
||||
#define SCALAR_HALF_PIXEL (3)
|
||||
#define SCALAR_INPUT_SCALE (4)
|
||||
#define SCALAR_INPUT_TAIL (5)
|
||||
#define SCALAR_OUTPUT_SCALE (6)
|
||||
#define SCALAR_OUTPUT_TAIL (7)
|
||||
|
||||
|
||||
#define RESIZE_1D_BILINEAR_NUM 4
|
||||
#define RESIZE_1D_BILINEAR_QUANT_NUM _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
|
||||
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_resize_1d_bilinear_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
||||
out_shape = output_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
|
||||
gpu_param.global_size[0] = gpu_align_p2(
|
||||
(out_shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0], 4);
|
||||
gpu_param.global_size[1] = (
|
||||
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||
/ gpu_param.global_scale[1]);
|
||||
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
|
||||
final:
|
||||
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
|
||||
SAFE_FREE_TENSOR_ATTR(output_attr);
|
||||
return status;
|
||||
} /* _resize_1d_bilinear_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
vsi_bool *is_use_u8_kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype = F16;
|
||||
vsi_nn_kernel_dtype_e out_dtype = F16;
|
||||
const _kernel_map_type * kernel_map = _resize_1d_bilinear_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _resize_1d_bilinear_kernel_map );
|
||||
vx_param_description_t * param_def = _resize_1d_bilinear_kernel_param_def;
|
||||
size_t param_def_size = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
|
||||
vx_kernel_initialize_f initializer = _resize_1d_bilinear_initializer;
|
||||
|
||||
uint32_t key = 0;
|
||||
uint32_t i = 0;
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (F16 == in_dtype)
|
||||
{
|
||||
in_dtype = F32;
|
||||
}
|
||||
if (F16 == out_dtype)
|
||||
{
|
||||
out_dtype = F32;
|
||||
}
|
||||
|
||||
|
||||
if ((U8 == in_dtype) || (U8 == out_dtype))
|
||||
{
|
||||
param_def_size = RESIZE_1D_BILINEAR_QUANT_NUM;
|
||||
*is_use_u8_kernel = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
param_def_size = RESIZE_1D_BILINEAR_NUM;
|
||||
*is_use_u8_kernel = FALSE;
|
||||
}
|
||||
|
||||
key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype );
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = (uint32_t)param_def_size;
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[RESIZE_1D_BILINEAR_QUANT_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
int32_t in_width = inputs[0]->attr.size[0];
|
||||
int32_t out_width = outputs[0]->attr.size[0];
|
||||
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
|
||||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float input_tail = -(input_zp * input_scale);
|
||||
float output_zp = (float)outputs[0]->attr.dtype.zero_point;
|
||||
float output_scale = (0 == outputs[0]->attr.dtype.scale) ? 1.0f : 1.0f / outputs[0]->attr.dtype.scale;
|
||||
float half_pixel_value = 0.0f;
|
||||
float scale_factor_x = 0.0f;
|
||||
vsi_bool is_use_u8_kernel = FALSE;
|
||||
|
||||
if (align_corners && out_width > 1)
|
||||
{
|
||||
scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
|
||||
}
|
||||
|
||||
if (half_pixel_centers)
|
||||
{
|
||||
half_pixel_value = 0.5f;
|
||||
}
|
||||
else
|
||||
{
|
||||
half_pixel_value = 0.0f;
|
||||
}
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
size_t node_params_num = RESIZE_1D_BILINEAR_NUM;
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, RESIZE_1D_BILINEAR_QUANT_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCALAR_SCALE_X] = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
|
||||
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
node_params[SCALAR_INPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &input_scale );
|
||||
node_params[SCALAR_INPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &input_tail );
|
||||
node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
|
||||
node_params[SCALAR_OUTPUT_TAIL] = vsi_nn_kernel_scalar_create(graph, F32, &output_zp );
|
||||
node_params_num = RESIZE_1D_BILINEAR_QUANT_NUM;
|
||||
}
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_SCALE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
|
||||
}
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( resize_1d_bilinear, _setup )
|
||||
|
||||
|
|
@ -0,0 +1,312 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
INTERNAL_KERNEL_RESIZE_1D_NEAREST,
|
||||
} _internal_kernel_e;
|
||||
|
||||
#define _RESIZE_1D_NEAREST_KERNEL_SOURCE "resize_1d_nearest"
|
||||
|
||||
#define STR(a) #a
|
||||
// Add kernel hashtable here
|
||||
#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
|
||||
(( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
|
||||
|
||||
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
|
||||
CVIVANTE_NAMESPACE("cl.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
|
||||
_RESIZE_1D_NEAREST_KERNEL_SOURCE }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
PACK_KERNEL_MAP( F32, F32),
|
||||
PACK_KERNEL_MAP( U8, U8),
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _RESIZE_1D_NEAREST_PARAM_NUM 5
|
||||
#define _RESIZE_1D_NEAREST_QUANT_NUM _cnt_of_array( _resize_1d_nearest_kernel_param_def )
|
||||
|
||||
#define SCALAR_SCALE_X (2)
|
||||
#define SCALAR_HALF_PIXEL (3)
|
||||
#define SCALAR_ROUND_VALUE (4)
|
||||
#define SCALAR_SCALE_VALUE (5)
|
||||
#define SCALAR_TAIL_VALUE (6)
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
||||
out_shape = output_attr->shape;
|
||||
|
||||
gpu_param.global_scale[0] = 1;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
|
||||
gpu_param.global_size[0] = gpu_align_p2(
|
||||
(out_shape->data[0] + gpu_param.global_scale[0] - 1)
|
||||
/ gpu_param.global_scale[0], 4);
|
||||
gpu_param.global_size[1] = (
|
||||
(out_shape->data[1] + gpu_param.global_scale[1] - 1)
|
||||
/ gpu_param.global_scale[1]);
|
||||
gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
|
||||
final:
|
||||
#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
|
||||
SAFE_FREE_TENSOR_ATTR(output_attr);
|
||||
return status;
|
||||
} /* _resize_1d_nearest_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
vsi_bool *is_use_u8_kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype = F16;
|
||||
vsi_nn_kernel_dtype_e out_dtype = F16;
|
||||
const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _resize_1d_nearest_kernel_map );
|
||||
vx_param_description_t * param_def = _resize_1d_nearest_kernel_param_def;
|
||||
size_t param_def_size = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
|
||||
vx_kernel_initialize_f initializer = _resize_1d_nearest_initializer;
|
||||
|
||||
uint32_t key = 0;
|
||||
uint32_t i = 0;
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (F16 == in_dtype)
|
||||
{
|
||||
in_dtype = F32;
|
||||
}
|
||||
if (F16 == out_dtype)
|
||||
{
|
||||
out_dtype = F32;
|
||||
}
|
||||
|
||||
if ((U8 == in_dtype) || (U8 == out_dtype))
|
||||
{
|
||||
param_def_size = _RESIZE_1D_NEAREST_QUANT_NUM;
|
||||
*is_use_u8_kernel = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
param_def_size = _RESIZE_1D_NEAREST_PARAM_NUM;
|
||||
*is_use_u8_kernel = FALSE;
|
||||
}
|
||||
|
||||
key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype );
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if ( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = (uint32_t)param_def_size;
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_QUANT_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
int32_t in_width = inputs[0]->attr.size[0];
|
||||
int32_t out_width = outputs[0]->attr.size[0];
|
||||
float input_zp = (float)inputs[0]->attr.dtype.zero_point;
|
||||
float input_scale = inputs[0]->attr.dtype.scale;
|
||||
float output_scale = (0 == outputs[0]->attr.dtype.scale) ? \
|
||||
input_scale : input_scale / outputs[0]->attr.dtype.scale;
|
||||
float output_tail = (float)outputs[0]->attr.dtype.zero_point - input_zp * output_scale;
|
||||
float half_pixel_value = 0.0f;
|
||||
float round_value = 0.0f;
|
||||
float scale_factor_x = 0.0f;
|
||||
vsi_bool is_use_u8_kernel = FALSE;
|
||||
|
||||
if (align_corners && out_width > 1)
|
||||
{
|
||||
scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
|
||||
}
|
||||
|
||||
if (align_corners)
|
||||
{
|
||||
round_value = 0.5f;
|
||||
}
|
||||
else
|
||||
{
|
||||
round_value = 0.0f;
|
||||
}
|
||||
|
||||
if (half_pixel_centers)
|
||||
{
|
||||
half_pixel_value = 0.5f;
|
||||
}
|
||||
else
|
||||
{
|
||||
half_pixel_value = 0.0f;
|
||||
}
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
size_t node_params_num = _RESIZE_1D_NEAREST_PARAM_NUM;
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCALAR_SCALE_X] = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
|
||||
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
|
||||
node_params[SCALAR_ROUND_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &round_value );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
node_params[SCALAR_SCALE_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
|
||||
node_params[SCALAR_TAIL_VALUE] = vsi_nn_kernel_scalar_create(graph, F32, &output_tail );
|
||||
node_params_num = _RESIZE_1D_NEAREST_QUANT_NUM;
|
||||
}
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ROUND_VALUE] );
|
||||
if (is_use_u8_kernel)
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_VALUE] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_TAIL_VALUE] );
|
||||
}
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CL( resize_1d_nearest, _setup )
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ typedef enum
|
|||
} unary_type_e;
|
||||
|
||||
|
||||
#define _CPU_ARG_NUM (1)
|
||||
#define _CPU_ARG_NUM (2)
|
||||
#define _CPU_INPUT_NUM (1)
|
||||
#define _CPU_OUTPUT_NUM (1)
|
||||
#define _CPU_IO_NUM (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
|
||||
|
|
@ -71,9 +71,9 @@ static float log_eval(float data)
|
|||
return logf(data);
|
||||
}
|
||||
|
||||
static float elu_eval(float data)
|
||||
static float elu_eval(float data, float alpha)
|
||||
{
|
||||
return data >=0 ? data : expf(data) - 1;
|
||||
return data >=0 ? data : expf(data) * alpha - alpha;
|
||||
}
|
||||
|
||||
static float neg_eval(float data)
|
||||
|
|
@ -114,6 +114,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
|
|||
size_t out_elements = 0;
|
||||
vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
|
||||
int32_t i;
|
||||
float alpha = 0;
|
||||
int32_t unary_type = 0;
|
||||
|
||||
tensors[0] = (vsi_nn_kernel_tensor_t)param[0];
|
||||
|
|
@ -126,6 +127,8 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
|
|||
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &unary_type);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[3], &alpha);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
|
||||
|
|
@ -151,7 +154,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
|
|||
data = log_eval(data);
|
||||
break;
|
||||
case UNARY_ELU:
|
||||
data = elu_eval(data);
|
||||
data = elu_eval(data, alpha);
|
||||
break;
|
||||
case UNARY_NEG:
|
||||
data = neg_eval(data);
|
||||
|
|
@ -193,9 +196,11 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
#define INPUT_FUNC_TYPE (2)
|
||||
#define INPUT_SCALAR_ALPHA (3)
|
||||
|
||||
static const vx_kernel_description_t _kernel_info =
|
||||
{
|
||||
|
|
@ -237,6 +242,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_status status = VSI_SUCCESS;
|
||||
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
||||
|
||||
status = _query_kernel( inputs, outputs, kernel );
|
||||
if( VSI_SUCCESS == status)
|
||||
|
|
@ -249,10 +255,13 @@ static vsi_nn_kernel_node_t _setup
|
|||
inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
|
||||
backend_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
|
||||
graph, I32, &unary_type );
|
||||
backend_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
|
||||
graph, F32, &alpha );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
|
||||
|
||||
vsi_nn_kernel_scalar_release( &backend_params[INPUT_FUNC_TYPE] );
|
||||
vsi_nn_kernel_scalar_release( &backend_params[INPUT_SCALAR_ALPHA] );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -258,11 +258,14 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis_num = 0;
|
||||
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
|
||||
size_t axis_num_temp = 0;
|
||||
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
|
||||
vsi_bool is_continue_axis = TRUE;
|
||||
uint32_t mask = 0;
|
||||
int32_t i = 0;
|
||||
|
||||
axis_num = (int32_t)axis_num_temp;
|
||||
|
||||
for ( i = 1; i < axis_num; i++)
|
||||
{
|
||||
if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,271 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (1)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_1d_bilinear")
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _RESIZE_1D_BILINEAR_PARAM_NUM _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
|
||||
|
||||
#define SCALAR_ALIGN_CORNERS (2)
|
||||
#define SCALAR_HALF_PIXEL (3)
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i = 0;
|
||||
int32_t align_corners = 0;
|
||||
int32_t half_pixel_centers = 0;
|
||||
float width_scale = 1.0f;
|
||||
uint32_t input_width = 0, output_width = 0;
|
||||
uint32_t w = 0, out = 0;
|
||||
uint32_t output_dims = 0;
|
||||
float data00 = .0f, data01 = .0f, interpolation = .0f;
|
||||
uint32_t index = 0;
|
||||
uint32_t outer = 0;
|
||||
|
||||
/* prepare data */
|
||||
for (i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
|
||||
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
|
||||
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
|
||||
input_width = in_attr[0]->shape->data[0];
|
||||
output_width = out_attr[0]->shape->data[0];
|
||||
output_dims = (uint32_t)out_attr[0]->shape->size;
|
||||
|
||||
if (align_corners && output_width > 1)
|
||||
{
|
||||
width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
|
||||
}
|
||||
|
||||
outer = 1;
|
||||
|
||||
for (i = 1; i < output_dims; i++)
|
||||
{
|
||||
outer = outer * out_attr[0]->shape->data[i];
|
||||
}
|
||||
|
||||
for (out = 0; out < outer; out++)
|
||||
{
|
||||
vx_int32 input_base = out * input_width;
|
||||
vx_int32 output_base = out * output_width;
|
||||
for (w = 0; w < output_width; w ++)
|
||||
{
|
||||
vx_float32 input_w;
|
||||
vx_int32 w0;
|
||||
vx_int32 w1;
|
||||
if (half_pixel_centers)
|
||||
{
|
||||
input_w = ((vx_float32)w + 0.5f) * width_scale - 0.5f;
|
||||
}
|
||||
else
|
||||
{
|
||||
input_w = w * width_scale;
|
||||
}
|
||||
w0 = (vx_int32)input_w;
|
||||
w1 = input_w < 0 ? 0 : vsi_nn_min(w0 + 1, (vx_int32)(input_width - 1));
|
||||
index = input_base + w0;
|
||||
data00 = f32_in_buffer[0][index];
|
||||
index = input_base + w1;
|
||||
data01 = f32_in_buffer[0][index];
|
||||
|
||||
interpolation = data00 * (1 - (input_w - w0)) +
|
||||
data01 * (input_w - w0);
|
||||
index = output_base + w;
|
||||
f32_out_buffer[0][index] = interpolation;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* save data */
|
||||
for (i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
if (f32_in_buffer[i])
|
||||
{
|
||||
free(f32_in_buffer[i]);
|
||||
f32_in_buffer[i] = NULL;
|
||||
}
|
||||
if (in_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (f32_out_buffer[i])
|
||||
{
|
||||
free(f32_out_buffer[i]);
|
||||
f32_out_buffer[i] = NULL;
|
||||
}
|
||||
if (out_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _resize_1d_bilinear_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
|
||||
status = VSI_SUCCESS;
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_BILINEAR_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_BILINEAR_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
|
||||
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_BILINEAR_PARAM_NUM );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( resize_1d_bilinear, _setup )
|
||||
|
||||
|
|
@ -0,0 +1,271 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
#define _INPUT_NUM (1)
|
||||
#define _OUTPUT_NUM (1)
|
||||
#define _KERNEL_NAME CVIVANTE_NAMESPACE("cpu.resize_1d_nearest")
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _RESIZE_1D_NEAREST_PARAM_NUM _cnt_of_array( _resize_1d_nearest_kernel_param_def )
|
||||
|
||||
#define SCALAR_ALIGN_CORNERS (2)
|
||||
#define SCALAR_HALF_PIXEL (3)
|
||||
|
||||
/*
|
||||
* Kernel function
|
||||
*/
|
||||
DEF_KERNEL_EXECUTOR(_compute)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
|
||||
float *f32_in_buffer[_INPUT_NUM] = {NULL};
|
||||
float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
|
||||
vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
|
||||
vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
|
||||
size_t out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
|
||||
size_t out_elements[_OUTPUT_NUM] = {0};
|
||||
size_t out_bytes[_OUTPUT_NUM] = {0};
|
||||
uint32_t i = 0;
|
||||
int32_t align_corners = 0;
|
||||
int32_t half_pixel_centers = 0;
|
||||
float width_scale = 1.0f;
|
||||
uint32_t input_width = 0, output_width = 0;
|
||||
uint32_t w = 0, out = 0;
|
||||
uint32_t output_dims = 0;
|
||||
uint32_t outer = 0;
|
||||
/* prepare data */
|
||||
for (i = 0; i < _INPUT_NUM; i ++)
|
||||
{
|
||||
input[i] = (vsi_nn_kernel_tensor_t)param[i];
|
||||
in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
|
||||
f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
|
||||
CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
|
||||
|
||||
}
|
||||
for (i = 0; i < _OUTPUT_NUM; i ++)
|
||||
{
|
||||
output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
|
||||
out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
|
||||
vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
|
||||
out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
|
||||
out_bytes[i] = out_elements[i] * sizeof(float);
|
||||
f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
|
||||
CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
|
||||
memset( f32_out_buffer[i], 0, out_bytes[i] );
|
||||
}
|
||||
|
||||
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
|
||||
vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
|
||||
input_width = in_attr[0]->shape->data[0];
|
||||
output_width = out_attr[0]->shape->data[0];
|
||||
output_dims = (uint32_t)out_attr[0]->shape->size;
|
||||
|
||||
if (align_corners && output_width > 1)
|
||||
{
|
||||
width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
|
||||
}
|
||||
|
||||
outer = 1;
|
||||
|
||||
for (i = 1; i < output_dims; i++)
|
||||
{
|
||||
outer = outer * out_attr[0]->shape->data[i];
|
||||
}
|
||||
|
||||
for (out = 0; out < outer; out++)
|
||||
{
|
||||
vx_int32 input_base = out * input_width;
|
||||
vx_int32 output_base = out * output_width;
|
||||
|
||||
for (w = 0; w < output_width; w ++)
|
||||
{
|
||||
float input_w;
|
||||
uint32_t in_x;
|
||||
int32_t in_index;
|
||||
int32_t out_index;
|
||||
|
||||
if (half_pixel_centers)
|
||||
{
|
||||
input_w = ((float)w + 0.5f) * width_scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
input_w = w * width_scale;
|
||||
}
|
||||
if (align_corners)
|
||||
{
|
||||
in_x = vsi_nn_min((uint32_t)simple_round(input_w), input_width - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
in_x = vsi_nn_min((uint32_t)floorf(input_w), input_width - 1);
|
||||
}
|
||||
in_index = in_x + input_base;
|
||||
out_index = w + output_base;
|
||||
f32_out_buffer[0][out_index] = f32_in_buffer[0][in_index];
|
||||
}
|
||||
}
|
||||
|
||||
/* save data */
|
||||
for (i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
|
||||
f32_out_buffer[i], out_elements[i] );
|
||||
CHECK_STATUS_FAIL_GOTO( status, final );
|
||||
}
|
||||
|
||||
final:
|
||||
for (i = 0; i < _INPUT_NUM; i++)
|
||||
{
|
||||
if (f32_in_buffer[i])
|
||||
{
|
||||
free(f32_in_buffer[i]);
|
||||
f32_in_buffer[i] = NULL;
|
||||
}
|
||||
if (in_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < _OUTPUT_NUM; i++)
|
||||
{
|
||||
if (f32_out_buffer[i])
|
||||
{
|
||||
free(f32_out_buffer[i]);
|
||||
f32_out_buffer[i] = NULL;
|
||||
}
|
||||
if (out_attr[i])
|
||||
{
|
||||
vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
} /* _compute() */
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs
|
||||
/* Add extra params */
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", _KERNEL_NAME );
|
||||
kernel->info.function = _compute;
|
||||
kernel->info.parameters = _resize_1d_nearest_kernel_param_def;
|
||||
kernel->info.numParams = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
|
||||
status = VSI_SUCCESS;
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM];
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
|
||||
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_CPU( resize_1d_nearest, _setup )
|
||||
|
||||
|
|
@ -279,10 +279,10 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
|
|||
|
||||
if( U8 == input_dtype && F16 == output_dtype )
|
||||
{
|
||||
vx_uint16 M0 = 0;
|
||||
vx_int8 postShift = 0;
|
||||
vx_uint32 multAndoutZP0[2] = {0};
|
||||
vx_uint32 multAndoutZP1[2] = {0};
|
||||
uint16_t M0 = 0;
|
||||
int32_t postShift = 0;
|
||||
uint32_t multAndoutZP0[2] = {0};
|
||||
uint32_t multAndoutZP1[2] = {0};
|
||||
|
||||
gpu_dp_inst_t uniU8MulAndPostShift_0_Lo_2x8 = {{
|
||||
0xdddddddd, // TCfg
|
||||
|
|
@ -305,12 +305,12 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
|
|||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);
|
||||
multAndoutZP0[0] = (vx_uint32)(M0);
|
||||
multAndoutZP0[1] = (vx_uint32)((output_ZP << postShift) - input_ZP * M0);
|
||||
uniU8MulAndPostShift_0_Lo_2x8.data[7] |= (postShift & 0x1F);
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(scaleIn1 / scaleOut, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(scaleIn1 / scaleOut, &M0, &postShift);
|
||||
multAndoutZP1[0] = (vx_uint32)(M0);
|
||||
multAndoutZP1[1] = (vx_uint32)((output_ZP << postShift) - input_ZP1 * M0);
|
||||
uniU8MulAndPostShift_1_Lo_2x8.data[7] |= (postShift & 0x1F);
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
|
|||
{
|
||||
uint32_t multAndoutZP[2] = {0};
|
||||
uint16_t M0 = 0;
|
||||
int8_t postShift = 0;
|
||||
int32_t postShift = 0;
|
||||
gpu_dp_inst_t uniDataMulAndPostShift_2x8 = {{
|
||||
0xdddddddd, // TCfg
|
||||
0x44444444, // ASelt
|
||||
|
|
@ -279,7 +279,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
|
|||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);
|
||||
|
||||
multAndoutZP[0] = (uint32_t)(M0);
|
||||
multAndoutZP[1] = (uint32_t)(output_ZP << postShift );
|
||||
|
|
@ -434,7 +434,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
|
|||
int32_t packedMaxData[4];
|
||||
float uint8Scale = scaleIn / scaleOut;
|
||||
uint16_t M0 = 0;
|
||||
int8_t postShift = 0;
|
||||
int32_t postShift = 0;
|
||||
uint32_t multAndoutZP[2] = {0};
|
||||
gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
|
||||
0xdddddddd, // TCfg
|
||||
|
|
@ -457,7 +457,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
|
|||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
|
||||
multAndoutZP[0] = (uint32_t)(M0);
|
||||
multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);
|
||||
|
||||
|
|
|
|||
|
|
@ -702,27 +702,66 @@ static vsi_nn_kernel_node_t _setup
|
|||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM];
|
||||
vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t weight_pad_front[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
int32_t weight_pad_end[VSI_NN_MAX_DIM_NUM] = {0};
|
||||
vsi_nn_tensor_t * weights = NULL;
|
||||
vsi_nn_tensor_t * biases = NULL;
|
||||
vsi_nn_tensor_t *temp_tensor[3] = {NULL};
|
||||
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
int32_t new_rank = 2;
|
||||
uint32_t i = 0;
|
||||
int32_t stride = vsi_nn_kernel_param_get_int32( params, "stride" );
|
||||
int32_t pad_front = vsi_nn_kernel_param_get_int32( params, "pad_front" );
|
||||
int32_t pad_end = vsi_nn_kernel_param_get_int32( params, "pad_end" );
|
||||
int32_t dilation = vsi_nn_kernel_param_get_int32( params, "dilation" );
|
||||
_internal_kernel_size_e ks = KN;
|
||||
|
||||
weight_pad_end[0] = gpu_align_np2_safe(inputs[1]->attr.size[0], 8) - inputs[1]->attr.size[0];
|
||||
if (!((VSI_NN_TYPE_UINT8 == inputs[0]->attr.dtype.vx_type)
|
||||
&& (VSI_NN_TYPE_UINT8 == inputs[1]->attr.dtype.vx_type)
|
||||
&& (NULL == inputs[2] || VSI_NN_TYPE_INT32 == inputs[2]->attr.dtype.vx_type)
|
||||
&& (VSI_NN_TYPE_UINT8 == outputs[0]->attr.dtype.vx_type)))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
weights = vsi_nn_pad_tensor(graph, inputs[1], weight_pad_front, weight_pad_end, inputs[1]->attr.dim_num,
|
||||
VSI_NN_PAD_MODE_CONSTANT, 0);
|
||||
reshape_tensors[0] = inputs[0];
|
||||
|
||||
biases = vsi_nn_merge_input_zeropoint_to_bias(graph, inputs[0], inputs[1], inputs[2]);
|
||||
if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
|
||||
{
|
||||
shape[0] = inputs[1]->attr.size[0];
|
||||
shape[1] = 1;
|
||||
for (i = 1; i < inputs[1]->attr.dim_num; i++)
|
||||
{
|
||||
shape[1] *= inputs[1]->attr.size[i];
|
||||
}
|
||||
reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[1], (uint32_t*)shape, new_rank );
|
||||
}
|
||||
else
|
||||
{
|
||||
reshape_tensors[1] = inputs[1];
|
||||
}
|
||||
|
||||
temp_tensor[0] = inputs[0];
|
||||
if (inputs[2] && inputs[2]->attr.dim_num == 1)
|
||||
{
|
||||
shape[0] = inputs[2]->attr.size[0];
|
||||
shape[1] = 1;
|
||||
new_rank = 2;
|
||||
reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
|
||||
inputs[2], (uint32_t*)shape, new_rank );
|
||||
}
|
||||
|
||||
weight_pad_end[0] = gpu_align_np2_safe(reshape_tensors[1]->attr.size[0], 8) - reshape_tensors[1]->attr.size[0];
|
||||
|
||||
weights = vsi_nn_pad_tensor(graph, reshape_tensors[1], weight_pad_front, weight_pad_end,
|
||||
reshape_tensors[1]->attr.dim_num, VSI_NN_PAD_MODE_CONSTANT, 0);
|
||||
|
||||
biases = vsi_nn_merge_input_zeropoint_to_bias(graph, reshape_tensors[0], reshape_tensors[1], reshape_tensors[2]);
|
||||
|
||||
temp_tensor[0] = reshape_tensors[0];
|
||||
temp_tensor[1] = weights;
|
||||
temp_tensor[2] = biases;
|
||||
|
||||
|
|
@ -760,6 +799,16 @@ static vsi_nn_kernel_node_t _setup
|
|||
}
|
||||
}
|
||||
|
||||
if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
|
||||
{
|
||||
vsi_nn_ReleaseTensor( &reshape_tensors[1] );
|
||||
}
|
||||
|
||||
if (inputs[2] && inputs[2]->attr.dim_num == 1)
|
||||
{
|
||||
vsi_nn_ReleaseTensor( &reshape_tensors[2] );
|
||||
}
|
||||
|
||||
if (weights)
|
||||
{
|
||||
vsi_nn_ReleaseTensor(&weights);
|
||||
|
|
|
|||
|
|
@ -164,9 +164,9 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
|
|||
else if ((U8 == input_attr->dtype) || (U8 == input1_attr->dtype))
|
||||
{
|
||||
uint16_t M0 = 0;
|
||||
int8_t postShift0 = 0;
|
||||
int32_t postShift0 = 0;
|
||||
uint16_t M1 = 0;
|
||||
int8_t postShift1 = 0;
|
||||
int32_t postShift1 = 0;
|
||||
uint32_t i = 0;
|
||||
gpu_dp_inst_t uniU8SubZptoF32Conv0_4x4 = {{
|
||||
0x09090909, // TCfg
|
||||
|
|
@ -188,8 +188,8 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
|
|||
0x00010001, 0x00000000, 0x00010001, 0x00000000,
|
||||
0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16 };
|
||||
vsi_nn_GetFP32MultiAndPostShift(scaleIn0, &M0, &postShift0);
|
||||
vsi_nn_GetFP32MultiAndPostShift(scaleIn1, &M1, &postShift1);
|
||||
gpu_quantize_multiplier_16bit(scaleIn0, &M0, &postShift0);
|
||||
gpu_quantize_multiplier_16bit(scaleIn1, &M1, &postShift1);
|
||||
uniU8SubZptoF32Conv0_4x4.data[7] |= (postShift0 & 0x1F);
|
||||
uniU8SubZptoF32Conv1_4x4.data[7] |= (postShift1 & 0x1F);
|
||||
for ( i = 0; i < 8; i++ )
|
||||
|
|
|
|||
|
|
@ -266,9 +266,11 @@ static vx_param_description_t kernel_param_def[] =
|
|||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
|
||||
#define INPUT_FUNC_TYPE (2)
|
||||
#define INPUT_SCALAR_ALPHA (3)
|
||||
#define _CL_PARAM_NUM _cnt_of_array(kernel_param_def)
|
||||
|
||||
/*
|
||||
|
|
@ -296,6 +298,7 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
|
|||
float inputTail = 0;
|
||||
float outputScale = 1.0f;
|
||||
float outputZP = 0;
|
||||
float alpha = 0;
|
||||
uint32_t pack_key;
|
||||
|
||||
attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
|
|
@ -303,7 +306,9 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
|
|||
attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
|
||||
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &type);
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[INPUT_FUNC_TYPE], &type);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[INPUT_SCALAR_ALPHA], &alpha);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
out_shape = attr[1]->shape;
|
||||
|
|
@ -408,6 +413,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
|
|||
"uniConvBF16toF32_Part1_2x8", &uniConvBF16toF32_Part1_2x8 );
|
||||
status |= vsi_nn_kernel_gpu_add_param( node,
|
||||
"uniExtractOddData_2x8", &uniExtractOddData_2x8 );
|
||||
status |= vsi_nn_kernel_gpu_add_param( node,
|
||||
"alpha", &alpha );
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
}
|
||||
break;
|
||||
|
|
@ -466,6 +473,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
|
|||
"outputScale", &outputScale );
|
||||
status |= vsi_nn_kernel_gpu_add_param( node,
|
||||
"outputZP", &outputZP );
|
||||
status |= vsi_nn_kernel_gpu_add_param( node,
|
||||
"alpha", &alpha );
|
||||
|
||||
if (attr[1]->dtype == F16)
|
||||
{
|
||||
|
|
@ -555,7 +564,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_tensor_t* rs_tensors[2] = { NULL };
|
||||
int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
|
||||
int32_t new_rank = 0;
|
||||
vsi_bool ret;
|
||||
vsi_bool ret = FALSE;
|
||||
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
||||
|
||||
ret = vsi_nn_kernel_optimize_element_shape(
|
||||
(int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
|
||||
|
|
@ -586,6 +596,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
rs_tensors, 1, &rs_tensors[1], 1 );
|
||||
node_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
|
||||
graph, I32, &unary_type );
|
||||
node_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
|
||||
graph, F32, &alpha );
|
||||
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
|
||||
|
|
@ -609,6 +621,11 @@ OnError:
|
|||
vsi_nn_kernel_scalar_release( &node_params[INPUT_FUNC_TYPE] );
|
||||
}
|
||||
|
||||
if (node_params[INPUT_SCALAR_ALPHA])
|
||||
{
|
||||
vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_ALPHA] );
|
||||
}
|
||||
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
|
|
|
|||
|
|
@ -181,8 +181,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
|
|||
float dstScale = 0;
|
||||
uint16_t M0 = 0;
|
||||
uint16_t M1 = 0;
|
||||
int8_t postShift0 = 0;
|
||||
int8_t postShift1 = 0;
|
||||
int32_t postShift0 = 0;
|
||||
int32_t postShift1 = 0;
|
||||
|
||||
uint32_t pack_key = 0;
|
||||
int32_t ac2zero = 0;
|
||||
|
|
@ -279,8 +279,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
|
|||
dstScale = 1;
|
||||
dstZP = 0.0f;
|
||||
}
|
||||
vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postShift0);
|
||||
vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postShift1);
|
||||
gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postShift0);
|
||||
gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postShift1);
|
||||
|
||||
mulKIn0In1Zp = (float)((int)(K + 3) / 4 * 4 * src1ZP * src0ZP);
|
||||
inOutScale = src0Scale * src1Scale / dstScale;
|
||||
|
|
|
|||
|
|
@ -588,7 +588,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_nn_kernel_node_param_t node_params[_MOMENTS_PARAM_NUM] = { NULL };
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t axis_num = 0;
|
||||
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
|
||||
size_t axis_num_temp = 0;
|
||||
int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
|
||||
int32_t axis_first = axis[0];
|
||||
int32_t shapes[2][VSI_NN_MAX_DIM_NUM] = { { 1, 1, 1, 1 } };
|
||||
vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
|
||||
|
|
@ -602,6 +603,8 @@ static vsi_nn_kernel_node_t _setup
|
|||
vsi_bool image_2d = FALSE;
|
||||
vsi_bool is_continue_axis = TRUE;
|
||||
|
||||
axis_num = (int32_t)axis_num_temp;
|
||||
|
||||
for ( i = 1; i < axis_num; i++)
|
||||
{
|
||||
if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
|
|||
int32_t input_fl = 0;
|
||||
int32_t output_fl = 0;
|
||||
uint16_t M0 = 0;
|
||||
int8_t postShift = 0;
|
||||
int32_t postShift = 0;
|
||||
float inputScale = 1.0f;
|
||||
int32_t input_ZP = 0;
|
||||
float outputScale = 1.0f;
|
||||
|
|
@ -193,7 +193,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
|
|||
if ( ( input_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM )
|
||||
&& ( output_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM ) )
|
||||
{
|
||||
vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);
|
||||
}
|
||||
|
||||
image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);
|
||||
|
|
|
|||
|
|
@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
|
|||
float dstZP = 0;
|
||||
float dstScale = 1.0f;
|
||||
|
||||
int8_t postshift0 = 0;
|
||||
int8_t postshift1 = 0;
|
||||
int32_t postshift0 = 0;
|
||||
int32_t postshift1 = 0;
|
||||
float outScale_fl = 1;
|
||||
|
||||
uint16_t M0 = 0;
|
||||
|
|
@ -229,7 +229,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
|
|||
src0ZP = attr[0]->asymm.zero_point;
|
||||
src0Scale = attr[0]->asymm.scale;
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postshift0);
|
||||
gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postshift0);
|
||||
}
|
||||
|
||||
if ( attr[1]->quant == VSI_NN_KERNEL_QUANT_DFP )
|
||||
|
|
@ -243,7 +243,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
|
|||
src1ZP = attr[1]->asymm.zero_point;
|
||||
src1Scale = attr[1]->asymm.scale;
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postshift1);
|
||||
gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postshift1);
|
||||
}
|
||||
|
||||
if ( attr[2]->quant == VSI_NN_KERNEL_QUANT_DFP )
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,533 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_graph.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_error.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "libnnext/vx_lib_nnext.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/*
|
||||
* Define kernel meta.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
LARGE = 0,
|
||||
SMALL
|
||||
} _internal_nearest_e;
|
||||
|
||||
#define _RESIZE_1D_NEAREST_KERNEL_SOURCE "resize_1d_nearest"
|
||||
|
||||
#define STR(a) #a
|
||||
// Add kernel hashtable here
|
||||
#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, mode ) \
|
||||
(( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) | (mode))
|
||||
|
||||
|
||||
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, LARGE ), \
|
||||
CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
|
||||
_RESIZE_1D_NEAREST_KERNEL_SOURCE }
|
||||
|
||||
#define PACK_KERNEL_MAP_OPT( IN_DTYPE, OUT_DTYPE ) \
|
||||
{ RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, SMALL ), \
|
||||
CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_op"), \
|
||||
_RESIZE_1D_NEAREST_KERNEL_SOURCE }
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t key;
|
||||
char * function_name;
|
||||
const char * source_name;
|
||||
} _kernel_map_type;
|
||||
|
||||
static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
|
||||
{
|
||||
// Register kernel here
|
||||
PACK_KERNEL_MAP(F16, F16),
|
||||
PACK_KERNEL_MAP(I16, I16),
|
||||
PACK_KERNEL_MAP(I8, I8),
|
||||
PACK_KERNEL_MAP(U8, U8),
|
||||
PACK_KERNEL_MAP_OPT(F16, F16),
|
||||
PACK_KERNEL_MAP_OPT(I16, I16),
|
||||
PACK_KERNEL_MAP_OPT(I8, I8),
|
||||
PACK_KERNEL_MAP_OPT(U8, U8),
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Kernel params
|
||||
*/
|
||||
static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
|
||||
{
|
||||
{VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
{VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
|
||||
};
|
||||
#define _RESIZE_1D_NEAREST_PARAM_NUM _cnt_of_array( _resize_1d_nearest_kernel_param_def )
|
||||
|
||||
#define SCALAR_ALIGN_CORNERS (2)
|
||||
#define SCALAR_HALF_PIXEL (3)
|
||||
|
||||
/*
|
||||
* Kernel initializer
|
||||
*/
|
||||
DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
|
||||
(
|
||||
vsi_nn_kernel_node_t node,
|
||||
const vsi_nn_kernel_node_param_t * param,
|
||||
size_t param_size
|
||||
)
|
||||
{
|
||||
#define MAX_POST_SHIFT_BITS (31)
|
||||
#define MAX_MULTIPLIER_NUM (65535)
|
||||
vsi_status status = VSI_FAILURE;
|
||||
gpu_param_t gpu_param = {
|
||||
3,
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0},
|
||||
{0, 0, 0}
|
||||
};
|
||||
vsi_nn_kernel_tensor_attr_t * output_attr = NULL;
|
||||
vsi_nn_kernel_tensor_attr_t * input_attr = NULL;
|
||||
vsi_int_array_t * out_shape = NULL;
|
||||
vsi_int_array_t * in_shape = NULL;
|
||||
vsi_nn_kernel_dtype_e input_dtype = F16;
|
||||
vsi_nn_kernel_dtype_e output_dtype = F16;
|
||||
int32_t align_corners = 0;
|
||||
int32_t half_pixel_centers = 0;
|
||||
uint32_t depth = 0;
|
||||
int32_t srcFixPointPos = 0;
|
||||
int32_t dstFixPointPos = 0;
|
||||
float input_scale = 1.0;
|
||||
int32_t inputZP = 0;
|
||||
float output_scale = 1.0;
|
||||
int32_t outputZP = 0;
|
||||
float scale_factor = 1.0f;
|
||||
uint32_t in_width = 0;
|
||||
uint32_t out_width = 0;
|
||||
uint32_t out_height = 0;
|
||||
float half_pixel_value = 0.0f;
|
||||
float round_value = 0.0f;
|
||||
|
||||
input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
|
||||
CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
|
||||
output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
|
||||
CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
|
||||
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &align_corners);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &half_pixel_centers);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
out_shape = output_attr->shape;
|
||||
in_shape = input_attr->shape;
|
||||
input_dtype = input_attr->dtype;
|
||||
output_dtype = output_attr->dtype;
|
||||
|
||||
in_width = in_shape->data[0];
|
||||
depth = in_shape->data[2];
|
||||
out_width = out_shape->data[0];
|
||||
out_height = out_shape->data[1];
|
||||
|
||||
if (BF16 == input_dtype && output_dtype == BF16)
|
||||
{
|
||||
input_dtype = F16;
|
||||
output_dtype = F16;
|
||||
}
|
||||
if (align_corners && out_width > 1)
|
||||
{
|
||||
scale_factor = ((float)(in_width - 1) * 1.0f) / (float)(out_width - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
scale_factor = ((float)in_width * 1.0f) / (float)out_width;
|
||||
}
|
||||
|
||||
|
||||
if (align_corners)
|
||||
{
|
||||
round_value = 0.5f;
|
||||
}
|
||||
else
|
||||
{
|
||||
round_value = 0.0f;
|
||||
}
|
||||
|
||||
if (half_pixel_centers)
|
||||
{
|
||||
half_pixel_value = 0.5f;
|
||||
}
|
||||
else
|
||||
{
|
||||
half_pixel_value = 0.0f;
|
||||
}
|
||||
|
||||
if (VSI_NN_KERNEL_QUANT_ASYMM == input_attr->quant )
|
||||
{
|
||||
input_scale = input_attr->asymm.scale;
|
||||
inputZP = input_attr->asymm.zero_point;
|
||||
}
|
||||
else if (VSI_NN_KERNEL_QUANT_DFP == input_attr->quant)
|
||||
{
|
||||
srcFixPointPos = input_attr->dfp.fl;
|
||||
if (srcFixPointPos >= 0)
|
||||
{
|
||||
input_scale = 1.0f / (float) ((int64_t)1 << srcFixPointPos);
|
||||
}
|
||||
else if (srcFixPointPos < 0)
|
||||
{
|
||||
input_scale = (float)((int64_t)1 << -srcFixPointPos);
|
||||
}
|
||||
inputZP = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
input_scale = 1.0f;
|
||||
inputZP = 0;
|
||||
}
|
||||
|
||||
if (VSI_NN_KERNEL_QUANT_ASYMM == output_attr->quant )
|
||||
{
|
||||
output_scale = 1.0f / output_attr->asymm.scale;
|
||||
outputZP = output_attr->asymm.zero_point;
|
||||
}
|
||||
else if (VSI_NN_KERNEL_QUANT_DFP == output_attr->quant)
|
||||
{
|
||||
dstFixPointPos = output_attr->dfp.fl;
|
||||
if (dstFixPointPos >= 0)
|
||||
{
|
||||
output_scale = (float) ((int64_t)1 << dstFixPointPos);
|
||||
}
|
||||
else if (dstFixPointPos < 0)
|
||||
{
|
||||
output_scale = 1.0f / (float) ((int64_t)1 << -dstFixPointPos);
|
||||
}
|
||||
outputZP = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_scale = 1.0;
|
||||
outputZP = 0;
|
||||
}
|
||||
|
||||
if (F16 == input_dtype && F16 == output_dtype)
|
||||
{
|
||||
gpu_dp_inst_t uniGetExtractData_2x8 = {{
|
||||
0x00009999, // TCfg
|
||||
0x00000000, // ASelt
|
||||
0x06040200, 0x00000000, // ABin
|
||||
0x0000aaaa, // BSelt
|
||||
0x00000000, 0x00000000, // BBin
|
||||
0x00000300, // AccumType, ConstantType, and PostShift
|
||||
0x00100010, 0x00100010, 0x00100010, 0x00100010,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
|
||||
if (scale_factor < 4.0f)
|
||||
{
|
||||
status = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
}
|
||||
|
||||
gpu_param.global_scale[0] = 4;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
status = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
}
|
||||
else if ( input_dtype == output_dtype && (I8 == input_dtype || I16 == input_dtype))
|
||||
{
|
||||
gpu_dp_inst_t uniGetExtractData_2x8 = {{
|
||||
0x00009999, // TCfg
|
||||
0x00000000, // ASelt
|
||||
0x06040200, 0x00000000, // ABin
|
||||
0x0000aaaa, // BSelt
|
||||
0x00000000, 0x00000000, // BBin
|
||||
0x00000300, // AccumType, ConstantType, and PostShift
|
||||
0x00080008, 0x00080008, 0x00080008, 0x00080008,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
gpu_dp_inst_t uniConvertI8toI8_2x8 = {{
|
||||
0x11111111, // TCfg
|
||||
0x00000000, // ASelt
|
||||
0x03020100, 0x07060504, // ABin
|
||||
0x22222222, // BSelt
|
||||
0x00000000, 0x00000000, // BBin
|
||||
0x00000600, // AccumType, ConstantType, and PostShift
|
||||
0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
||||
0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
|
||||
if (I16 == input_dtype)
|
||||
{
|
||||
uniGetExtractData_2x8.data[8] = 0x00100010;
|
||||
uniGetExtractData_2x8.data[9] = 0x00100010;
|
||||
uniGetExtractData_2x8.data[10] = 0x00100010;
|
||||
uniGetExtractData_2x8.data[11] = 0x00100010;
|
||||
uniGetExtractData_2x8.data[12] = 0x00100010;
|
||||
uniGetExtractData_2x8.data[13] = 0x00100010;
|
||||
uniGetExtractData_2x8.data[14] = 0x00100010;
|
||||
uniGetExtractData_2x8.data[15] = 0x00100010;
|
||||
}
|
||||
|
||||
if (srcFixPointPos > dstFixPointPos)
|
||||
{
|
||||
int32_t postshift = vsi_nn_min(srcFixPointPos - dstFixPointPos, MAX_POST_SHIFT_BITS);
|
||||
|
||||
uniConvertI8toI8_2x8.data[7] |= (postshift & 0x1F);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t multiplier = vsi_nn_min((int64_t)1 << (dstFixPointPos - srcFixPointPos), MAX_MULTIPLIER_NUM);
|
||||
uint32_t i = 0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
uniConvertI8toI8_2x8.data[i + 8] = multiplier;
|
||||
}
|
||||
}
|
||||
|
||||
if (scale_factor < 4.0f)
|
||||
{
|
||||
status = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
}
|
||||
|
||||
gpu_param.global_scale[0] = 4;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
status = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
|
||||
status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertI8toI8_2x8", &uniConvertI8toI8_2x8);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
}
|
||||
else if (U8 == input_dtype && U8 == output_dtype)
|
||||
{
|
||||
uint16_t M0 = 0;
|
||||
int32_t postShift = 0;
|
||||
uint32_t multAndoutZP[2] = {0};
|
||||
gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
|
||||
0xdddddddd, // TCfg
|
||||
0x44444444, // ASelt
|
||||
0x13121110, 0x17161514, // ABin
|
||||
0x11111111, // BSelt
|
||||
0x00000000, 0x00000000, // BBin
|
||||
0x00002400, // AccumType, ConstantType, and PostShift
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
gpu_dp_inst_t uniGetExtractData_2x8 = {{
|
||||
0x00009999, // TCfg
|
||||
0x00000000, // ASelt
|
||||
0x06040200, 0x00000000, // ABin
|
||||
0x0000aaaa, // BSelt
|
||||
0x00000000, 0x00000000, // BBin
|
||||
0x00000300, // AccumType, ConstantType, and PostShift
|
||||
0x00080008, 0x00080008, 0x00080008, 0x00080008,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
|
||||
gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);
|
||||
|
||||
multAndoutZP[0] = (uint32_t)(M0);
|
||||
multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);
|
||||
|
||||
uniMultiplyAndPostShift_2x8.data[7] |= (postShift & 0x1F);
|
||||
|
||||
if (scale_factor < 4.0f)
|
||||
{
|
||||
status = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
}
|
||||
|
||||
gpu_param.global_scale[0] = 4;
|
||||
gpu_param.global_scale[1] = 1;
|
||||
gpu_param.global_scale[2] = 1;
|
||||
|
||||
|
||||
status = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
|
||||
status |= vsi_nn_kernel_gpu_add_param( node, "multAndoutZP", multAndoutZP);
|
||||
status |= vsi_nn_kernel_gpu_add_param( node, "uniMultiplyAndPostShift_2x8", &uniMultiplyAndPostShift_2x8);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
}
|
||||
status = vsi_nn_kernel_gpu_add_param( node, "half_pixel_value", &half_pixel_value);
|
||||
status |= vsi_nn_kernel_gpu_add_param( node, "round_value", &round_value);
|
||||
CHECK_STATUS_FAIL_GOTO(status, final );
|
||||
|
||||
gpu_param.global_size[0] = gpu_align_p2((out_width + gpu_param.global_scale[0] - 1)\
|
||||
/ gpu_param.global_scale[0], 4);
|
||||
gpu_param.global_size[1] = (out_height + gpu_param.global_scale[1] - 1) / gpu_param.global_scale[1];
|
||||
gpu_param.global_size[2] = depth;
|
||||
|
||||
|
||||
status = vsi_nn_kernel_gpu_config( node, &gpu_param );
|
||||
#undef MAX_MULTIPLIER_NUM
|
||||
#undef MAX_POST_SHIFT_BITS
|
||||
final:
|
||||
if (input_attr) vsi_nn_kernel_tensor_attr_release( &input_attr );
|
||||
if (output_attr) vsi_nn_kernel_tensor_attr_release( &output_attr );
|
||||
return status;
|
||||
} /* _resize_nearest_initializer() */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Query kernel
|
||||
*/
|
||||
static vsi_status _query_kernel
|
||||
(
|
||||
vsi_nn_kernel_t * kernel,
|
||||
vsi_nn_tensor_t * const * const inputs,
|
||||
vsi_nn_tensor_t * const * const outputs,
|
||||
int32_t align_corners
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_dtype_e in_dtype = F16;
|
||||
vsi_nn_kernel_dtype_e out_dtype = F16;
|
||||
const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
|
||||
size_t kernel_map_size = _cnt_of_array( _resize_1d_nearest_kernel_map );
|
||||
vx_param_description_t * param_def = _resize_1d_nearest_kernel_param_def;
|
||||
size_t param_def_size = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
|
||||
vx_kernel_initialize_f initializer = _resize_1d_nearest_initializer;
|
||||
|
||||
uint32_t key = 0;
|
||||
uint32_t i = 0;
|
||||
uint32_t inputWidth = inputs[0]->attr.size[0];
|
||||
uint32_t outputWidth = outputs[0]->attr.size[0];
|
||||
float scale_factor;
|
||||
_internal_nearest_e resize_mode = LARGE;
|
||||
|
||||
if (align_corners && outputWidth > 1)
|
||||
{
|
||||
scale_factor = (vx_float32)(inputWidth - 1) / (vx_float32)(outputWidth - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
scale_factor = (vx_float32)inputWidth / (vx_float32)outputWidth;
|
||||
}
|
||||
|
||||
if (scale_factor < 4.0f)
|
||||
{
|
||||
resize_mode = SMALL;
|
||||
}
|
||||
else
|
||||
{
|
||||
resize_mode = LARGE;
|
||||
}
|
||||
|
||||
|
||||
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
|
||||
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
|
||||
|
||||
if (BF16 == in_dtype && BF16 == out_dtype)
|
||||
{
|
||||
in_dtype = F16;
|
||||
out_dtype = F16;
|
||||
}
|
||||
|
||||
key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype, resize_mode );
|
||||
|
||||
for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
|
||||
{
|
||||
if( kernel_map[i].key == key )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( i < (uint32_t)kernel_map_size )
|
||||
{
|
||||
snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s", kernel_map[i].function_name );
|
||||
kernel->info.parameters = param_def;
|
||||
kernel->info.numParams = (uint32_t)param_def_size;
|
||||
kernel->info.initialize = initializer;
|
||||
// Register code source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
|
||||
"vsi_nn_kernel_header",
|
||||
kernel_map[i].source_name );
|
||||
// Register binary source
|
||||
vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
|
||||
kernel_map[i].source_name );
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
return status;
|
||||
} /* _query_kernel() */
|
||||
|
||||
|
||||
static vsi_nn_kernel_node_t _setup
|
||||
(
|
||||
vsi_nn_graph_t * graph,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
size_t input_num,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM] = {NULL};
|
||||
vsi_nn_kernel_node_t node = NULL;
|
||||
int32_t align_corners = vsi_nn_kernel_param_get_int32( params, "align_corners" );
|
||||
int32_t half_pixel_centers = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
|
||||
|
||||
status = _query_kernel( kernel, inputs, outputs, align_corners );
|
||||
if ( VSI_SUCCESS == status)
|
||||
{
|
||||
node = vsi_nn_kernel_create_node( graph, kernel );
|
||||
if ( node )
|
||||
{
|
||||
/* Set inputs and outputs */
|
||||
vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
|
||||
inputs, input_num, outputs, output_num );
|
||||
node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
|
||||
node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
|
||||
/* Pass parameters to node. */
|
||||
status = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
|
||||
VSI_ASSERT( status == VSI_SUCCESS );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
|
||||
vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} /* _setup() */
|
||||
|
||||
__END_DECLS
|
||||
|
||||
REGISTER_BACKEND_EVIS( resize_1d_nearest, _setup )
|
||||
|
||||
|
|
@ -850,7 +850,7 @@ static vsi_status _query_kernel
|
|||
}
|
||||
}
|
||||
|
||||
if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size))
|
||||
if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size) && is_same_type && is_evis2)
|
||||
{
|
||||
scale_flag = UP_OPT;
|
||||
key = RESIZE_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
|
|||
else if (U8 == input_dtype && U8 == output_dtype)
|
||||
{
|
||||
uint16_t M0 = 0;
|
||||
vx_int8 postShift = 0;
|
||||
int32_t postShift = 0;
|
||||
uint32_t multAndoutZP[2] = {0};
|
||||
gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
|
||||
0xdddddddd, // TCfg
|
||||
|
|
@ -371,7 +371,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
|
|||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(input_scale * output_scale, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);
|
||||
|
||||
multAndoutZP[0] = (uint32_t)(M0);
|
||||
multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);
|
||||
|
|
|
|||
|
|
@ -131,9 +131,9 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
|
|||
float outputScale = 1.0f;
|
||||
int32_t outputZP = 0;
|
||||
uint16_t in0_M0 = 0;
|
||||
int8_t in0_postShift = 0;
|
||||
int32_t in0_postShift = 0;
|
||||
uint16_t in1_M0 = 0;
|
||||
int8_t in1_postShift = 0;
|
||||
int32_t in1_postShift = 0;
|
||||
uint32_t pack_key = 0;
|
||||
input0_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
|
||||
CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
|
||||
|
|
@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
|
|||
outputZP = output_attr->asymm.zero_point;
|
||||
}
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(input0Scale / outputScale, &in0_M0, &in0_postShift);
|
||||
vsi_nn_GetFP32MultiAndPostShift(input1Scale / outputScale, &in1_M0, &in1_postShift);
|
||||
gpu_quantize_multiplier_16bit(input0Scale / outputScale, &in0_M0, &in0_postShift);
|
||||
gpu_quantize_multiplier_16bit(input1Scale / outputScale, &in1_M0, &in1_postShift);
|
||||
|
||||
pack_key = _PACK_SELECT_KEY( input0_attr->dtype, input1_attr->dtype, output_attr->dtype );
|
||||
|
||||
|
|
|
|||
|
|
@ -309,7 +309,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
|
|||
{
|
||||
float uint8Scale = scaleIn / scaleOut;
|
||||
uint16_t M0 = 0;
|
||||
int8_t postShift = 0;
|
||||
int32_t postShift = 0;
|
||||
uint32_t multAndoutZP[2] = {0};
|
||||
|
||||
gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
|
||||
|
|
@ -323,7 +323,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
|
|||
0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
|
||||
}, GPU_DP_TYPE_16};
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
|
||||
multAndoutZP[0] = (uint32_t)(M0);
|
||||
multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);
|
||||
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
|
|||
int32_t input_fl = 0;
|
||||
int32_t output_fl = 0;
|
||||
uint16_t M0 = 0;
|
||||
int8_t postShift = 0;
|
||||
int32_t postShift = 0;
|
||||
float inputScale = 1.0f;
|
||||
int32_t input_ZP = 0;
|
||||
float outputScale = 1.0f;
|
||||
|
|
@ -212,7 +212,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
|
|||
factorOut = 1.0f / outputScale;
|
||||
|
||||
|
||||
vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift);
|
||||
gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);
|
||||
|
||||
image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ void gpu_quantize_multiplier_32bit
|
|||
double q;
|
||||
int64_t q_fixed;
|
||||
const int32_t bit = 32;
|
||||
if( vsi_abs(double_multiplier - 0.0) < 1e-5 )
|
||||
if( vsi_abs(double_multiplier - 0.0) < 1e-8 )
|
||||
{
|
||||
*quantize_multiplier = 0;
|
||||
*shift = bit - 0;
|
||||
|
|
@ -116,6 +116,25 @@ void gpu_quantize_multiplier_32bit
|
|||
}
|
||||
} /* gpu_quantize_multiplier_32_bit() */
|
||||
|
||||
void _modify_multiplier_postshift
|
||||
(
|
||||
uint16_t * quantize_multiplier,
|
||||
int32_t * shift
|
||||
)
|
||||
{
|
||||
uint16_t multiplier = *quantize_multiplier;
|
||||
int32_t postshift = *shift;
|
||||
|
||||
while (postshift > GPU_MAX_POST_SHIFT_BITS)
|
||||
{
|
||||
multiplier = (multiplier + 1) >> 1;
|
||||
postshift --;
|
||||
}
|
||||
|
||||
*quantize_multiplier = multiplier;
|
||||
*shift = postshift;
|
||||
}
|
||||
|
||||
void gpu_quantize_multiplier_16bit
|
||||
(
|
||||
double double_multiplier,
|
||||
|
|
@ -135,5 +154,8 @@ void gpu_quantize_multiplier_16bit
|
|||
{
|
||||
*shift -= bit;
|
||||
}
|
||||
|
||||
_modify_multiplier_postshift(quantize_multiplier, shift);
|
||||
|
||||
} /* gpu_quantize_multiplier_16bit() */
|
||||
|
||||
|
|
|
|||
|
|
@ -1145,10 +1145,10 @@ vsi_nn_kernel_tensor_attr_t * vsi_nn_kernel_tensor_attr_create
|
|||
status = vxQueryTensor( (vx_tensor)tensor, VX_TENSOR_SCALE,
|
||||
&(attr->asymm.scale), sizeof(float));
|
||||
CHECK_STATUS( status );
|
||||
// Reset scale to 1
|
||||
if( (attr->asymm.scale - 0.f) < 1e-5 )
|
||||
// Reset scale to 1e-8
|
||||
if( (attr->asymm.scale - 0.f) < 1e-8 )
|
||||
{
|
||||
attr->asymm.scale = 1.0f;
|
||||
attr->asymm.scale = (float)1e-8;
|
||||
attr->asymm.zero_point = 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -1225,13 +1225,17 @@ vsi_status vsi_nn_kernel_pirority_set
|
|||
static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
|
||||
{
|
||||
char *envctrl;
|
||||
int32_t enableShader = 1;
|
||||
static int32_t enableShader = -1;
|
||||
|
||||
if (enableShader == -1)
|
||||
{
|
||||
enableShader = 1;
|
||||
envctrl = getenv("VIV_VX_ENABLE_SHADER");
|
||||
if (envctrl)
|
||||
{
|
||||
enableShader = atoi(envctrl);
|
||||
}
|
||||
}
|
||||
|
||||
#if VX_HARDWARE_CAPS_PARAMS_EXT_SUPPORT
|
||||
if ( graph->ctx->config.subGroupSize == 0 )
|
||||
|
|
@ -1240,7 +1244,7 @@ static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
|
|||
}
|
||||
#endif
|
||||
|
||||
if(enableShader == 1)
|
||||
if (enableShader >= 1)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -459,7 +459,7 @@ vsi_bool vsi_nn_kernel_optimize_broadcast_shape
|
|||
k = 0;
|
||||
for (j = 0; j < (size_t)input_num; j++)
|
||||
{
|
||||
if (size_in[k] > 1)
|
||||
if (size_in[j] > 1)
|
||||
{
|
||||
k = j;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -605,8 +605,12 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
|
|||
{
|
||||
vsi_nn_tensor_t * new_bias = NULL;
|
||||
vsi_nn_tensor_attr_t attr;
|
||||
|
||||
int32_t *new_bias_data_ptr = NULL;
|
||||
uint8_t *weight_data = NULL;
|
||||
int32_t *bias_data = NULL;
|
||||
uint32_t i, j;
|
||||
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
|
||||
weight_data = vsi_nn_ConvertTensorToData(graph, weight);
|
||||
|
||||
if (bias == NULL)
|
||||
{
|
||||
|
|
@ -620,26 +624,47 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
|
|||
attr.dtype.zero_point = 0;
|
||||
attr.dtype.vx_type = VSI_NN_TYPE_INT32;
|
||||
}
|
||||
else
|
||||
{
|
||||
VSILOGE("need to add ...");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&attr, &bias->attr, sizeof(vsi_nn_tensor_attr_t));
|
||||
if (attr.dim_num == 1)
|
||||
{
|
||||
attr.size[1] = 1;
|
||||
attr.dim_num = 2;
|
||||
}
|
||||
bias_data = (int32_t *)vsi_nn_ConvertTensorToData(graph, bias);
|
||||
}
|
||||
|
||||
new_bias = vsi_nn_CreateTensorWithDefault(graph, &attr, 0.0);
|
||||
new_bias_data_ptr = (int32_t *)malloc(attr.size[0] * sizeof(int32_t));
|
||||
memset((void *)new_bias_data_ptr, 0, sizeof(int32_t) * attr.size[0]);
|
||||
|
||||
if (input->attr.dtype.zero_point == 0)
|
||||
if (input->attr.dtype.zero_point != 0)
|
||||
{
|
||||
return new_bias;
|
||||
}
|
||||
else
|
||||
for (i = 0; i < weight->attr.size[1]; i++)
|
||||
{
|
||||
VSILOGE("need to process bias - (input_zp * (w - w_zp)) ...");
|
||||
uint8_t *weight_ptr = weight_data + i * weight->attr.size[0];
|
||||
for (j = 0; j < weight->attr.size[0]; j++)
|
||||
{
|
||||
new_bias_data_ptr[i] += -((int32_t)weight_ptr[j] - weight->attr.dtype.zero_point) \
|
||||
* input->attr.dtype.zero_point;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bias_data != NULL)
|
||||
{
|
||||
for (i = 0; i < weight->attr.size[1]; i++)
|
||||
{
|
||||
new_bias_data_ptr[i] += bias_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
new_bias = vsi_nn_CreateTensorFromData(graph, (uint8_t *)new_bias_data_ptr, &attr);
|
||||
|
||||
vsi_nn_safe_free( new_bias_data_ptr );
|
||||
vsi_nn_safe_free( bias_data );
|
||||
vsi_nn_safe_free( weight_data );
|
||||
|
||||
return new_bias;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@
|
|||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "kernel/vsi_nn_kernel_node.h"
|
||||
#include "vsi_nn_feature.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "vsi_nn_graph_optimization.h"
|
||||
|
||||
static vsi_bool _build_vx_conv2d_param
|
||||
(
|
||||
|
|
@ -173,6 +175,7 @@ static vx_tensor _expand_tensor_dim
|
|||
vsi_nn_kernel_t * kernel \
|
||||
)
|
||||
|
||||
|
||||
REGISTER_CONV_OPENVX_KERNEL( conv1d )
|
||||
{
|
||||
vx_node node = NULL;
|
||||
|
|
@ -196,10 +199,34 @@ REGISTER_CONV_OPENVX_KERNEL( conv1d )
|
|||
temp_tensors[0] = _expand_tensor_dim( inputs[0]->t,
|
||||
(int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
|
||||
CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );
|
||||
|
||||
if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
|
||||
{
|
||||
temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
|
||||
(int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
|
||||
CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
|
||||
}
|
||||
else
|
||||
{
|
||||
uint8_t * data = NULL;
|
||||
vsi_nn_tensor_attr_t attr;
|
||||
uint32_t i;
|
||||
|
||||
data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
|
||||
CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
|
||||
|
||||
memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
|
||||
|
||||
attr.size[0] = 1;
|
||||
for (i = 1; i <= inputs[1]->attr.dim_num; i++)
|
||||
{
|
||||
attr.size[i] = inputs[1]->attr.size[i - 1];
|
||||
}
|
||||
attr.dim_num = inputs[1]->attr.dim_num + 1;
|
||||
attr.dtype.channel_dim = inputs[1]->attr.dtype.channel_dim + 1;
|
||||
|
||||
temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
|
||||
vsi_nn_safe_free( data );
|
||||
}
|
||||
|
||||
temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
|
||||
(int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );
|
||||
|
|
@ -248,9 +275,38 @@ REGISTER_CONV_OPENVX_KERNEL( depthwise_conv1d )
|
|||
(int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
|
||||
CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );
|
||||
|
||||
if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
|
||||
{
|
||||
temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
|
||||
(int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
|
||||
CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
|
||||
}
|
||||
else
|
||||
{
|
||||
uint8_t * data = NULL;
|
||||
vsi_nn_tensor_attr_t attr;
|
||||
uint32_t i;
|
||||
|
||||
data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
|
||||
CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
|
||||
|
||||
memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
|
||||
|
||||
attr.size[0] = 1;
|
||||
attr.size[1] = inputs[1]->attr.size[0];
|
||||
attr.size[2] = 1;
|
||||
for (i = 1; i < inputs[1]->attr.dim_num; i++)
|
||||
{
|
||||
attr.size[2] *= inputs[1]->attr.size[i];
|
||||
}
|
||||
attr.size[3] = 1;
|
||||
attr.dim_num = 4;
|
||||
attr.dtype.channel_dim = 2;
|
||||
|
||||
temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
|
||||
|
||||
vsi_nn_safe_free( data );
|
||||
}
|
||||
|
||||
temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
|
||||
(int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );
|
||||
|
|
|
|||
|
|
@ -38,27 +38,27 @@ typedef struct _sort_lut_s
|
|||
float val;
|
||||
} sort_lut;
|
||||
|
||||
static float exp_eval(float val)
|
||||
static float exp_eval(float val, float alpha)
|
||||
{
|
||||
return expf(val);
|
||||
}
|
||||
|
||||
static float log_eval(float data)
|
||||
static float log_eval(float data, float alpha)
|
||||
{
|
||||
return logf(data);
|
||||
}
|
||||
|
||||
static float elu_eval(float data)
|
||||
static float elu_eval(float data, float alpha)
|
||||
{
|
||||
return data >=0 ? data : expf(data) - 1;
|
||||
return data >=0 ? data : expf(data) * alpha - alpha;
|
||||
}
|
||||
|
||||
static float neg_eval(float data)
|
||||
static float neg_eval(float data, float alpha)
|
||||
{
|
||||
return data * -1.0f;
|
||||
}
|
||||
|
||||
static float hsigmoid_eval(float data)
|
||||
static float hsigmoid_eval(float data, float alpha)
|
||||
{
|
||||
data = (float)(0.2 * data + 0.5);
|
||||
data = vsi_nn_clamp(data, 0, 1);
|
||||
|
|
@ -66,14 +66,14 @@ static float hsigmoid_eval(float data)
|
|||
return data;
|
||||
}
|
||||
|
||||
static float soft_plus_eval(float data)
|
||||
static float soft_plus_eval(float data, float alpha)
|
||||
{
|
||||
return log_eval(exp_eval(data) + 1);
|
||||
return log_eval(exp_eval(data, alpha) + 1, alpha);
|
||||
}
|
||||
|
||||
static float mish_eval(float data)
|
||||
static float mish_eval(float data, float alpha)
|
||||
{
|
||||
data = (float)(data * tanh(soft_plus_eval(data)));
|
||||
data = (float)(data * tanh(soft_plus_eval(data, alpha)));
|
||||
|
||||
return data;
|
||||
}
|
||||
|
|
@ -96,7 +96,7 @@ static int32_t _lut_comparator(const void *pa, const void *pb)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void _set_unary_table_lookup(float func(float), float *index, float *value)
|
||||
static void _set_unary_table_lookup(float func(float, float), float *index, float *value, float alpha)
|
||||
{
|
||||
#define VSI_NN_MAX_LUT_SIZE (1024)
|
||||
#define FLT16_MAX (57344)
|
||||
|
|
@ -108,25 +108,25 @@ static void _set_unary_table_lookup(float func(float), float *index, float *valu
|
|||
{
|
||||
int16_t val = (int16_t)(i << 6);
|
||||
lut[i].index = fp16_to_fp32(val);
|
||||
lut[i].val = func(lut[i].index);
|
||||
lut[i].val = func(lut[i].index, alpha);
|
||||
}
|
||||
|
||||
for (i = 0x0; i < 0x10; i++)
|
||||
{
|
||||
lut[i].index = 0;
|
||||
lut[i].val = func(lut[i].index);
|
||||
lut[i].val = func(lut[i].index, alpha);
|
||||
}
|
||||
|
||||
for (i = 0x1F0; i < 0x200; i++)
|
||||
{
|
||||
lut[i].index = FLT16_MAX;
|
||||
lut[i].val = func(lut[i].index);
|
||||
lut[i].val = func(lut[i].index, alpha);
|
||||
}
|
||||
|
||||
for (i = 0x3F0; i < 0x400; i++)
|
||||
{
|
||||
lut[i].index = FLT16_MIN;
|
||||
lut[i].val = func(lut[i].index);
|
||||
lut[i].val = func(lut[i].index, alpha);
|
||||
}
|
||||
|
||||
qsort(lut, VSI_NN_MAX_LUT_SIZE, sizeof(sort_lut), _lut_comparator);
|
||||
|
|
@ -154,13 +154,14 @@ static vsi_nn_kernel_node_t _setup
|
|||
size_t output_num,
|
||||
const vsi_nn_kernel_param_t * params,
|
||||
vsi_nn_kernel_t * kernel,
|
||||
float func(float)
|
||||
float func(float, float)
|
||||
)
|
||||
{
|
||||
#ifdef VX_USER_LOOKUP_TABLE_SUPPORT
|
||||
vx_lut lut1 = NULL;
|
||||
vx_lut lut2 = NULL;
|
||||
vx_node node = NULL;
|
||||
float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
|
||||
float index[1024] = {0};
|
||||
float value[1024] = {0};
|
||||
|
||||
|
|
@ -172,7 +173,7 @@ static vsi_nn_kernel_node_t _setup
|
|||
return NULL;
|
||||
}
|
||||
|
||||
_set_unary_table_lookup(func, index, value);
|
||||
_set_unary_table_lookup(func, index, value, alpha);
|
||||
|
||||
lut1 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);
|
||||
lut2 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
|
||||
float4 eltwise_unary_sin(float4 x)
|
||||
float4 eltwise_unary_sin(float4 x, float alpha)
|
||||
{
|
||||
return native_sin(x);
|
||||
}
|
||||
|
||||
#define logE (1.44269502f)
|
||||
#define twoLogE (logE * 2.0f)
|
||||
float4 eltwise_unary_exp(float4 x)
|
||||
float4 eltwise_unary_exp(float4 x, float alpha)
|
||||
{
|
||||
x *= logE;
|
||||
x = exp2(x);
|
||||
|
|
@ -14,33 +14,33 @@ float4 eltwise_unary_exp(float4 x)
|
|||
}
|
||||
|
||||
#define rlogE (0.693147182f)
|
||||
float4 eltwise_unary_log(float4 x)
|
||||
float4 eltwise_unary_log(float4 x, float alpha)
|
||||
{
|
||||
x = log2(x);
|
||||
return x * rlogE;
|
||||
}
|
||||
|
||||
float4 eltwise_unary_elu(float4 val)
|
||||
float4 eltwise_unary_elu(float4 val, float alpha)
|
||||
{
|
||||
float4 x = val * logE;
|
||||
x = exp2(x) - 1;
|
||||
x = exp2(x) * alpha - alpha;
|
||||
|
||||
return val < 0 ? x : val;
|
||||
}
|
||||
|
||||
float4 eltwise_unary_neg(float4 x)
|
||||
float4 eltwise_unary_neg(float4 x, float alpha)
|
||||
{
|
||||
return x * -1;
|
||||
}
|
||||
|
||||
float4 eltwise_unary_hard_sigmoid(float4 x)
|
||||
float4 eltwise_unary_hard_sigmoid(float4 x, float alpha)
|
||||
{
|
||||
x = 0.2 * x + 0.5;
|
||||
x = clamp(x, 0, 1);
|
||||
return x;
|
||||
}
|
||||
|
||||
float4 _softrelu(float4 x)
|
||||
float4 _softrelu(float4 x, float alpha)
|
||||
{
|
||||
x *= logE;
|
||||
x = exp2(x);
|
||||
|
|
@ -49,7 +49,7 @@ float4 _softrelu(float4 x)
|
|||
return x * rlogE;
|
||||
}
|
||||
|
||||
float4 _tanh(float4 x)
|
||||
float4 _tanh(float4 x, float alpha)
|
||||
{
|
||||
x *= -twoLogE;
|
||||
x = 1 + exp2(x);
|
||||
|
|
@ -57,10 +57,10 @@ float4 _tanh(float4 x)
|
|||
return (2 * x - 1);
|
||||
}
|
||||
|
||||
float4 eltwise_unary_mish(float4 x)
|
||||
float4 eltwise_unary_mish(float4 x, float alpha)
|
||||
{
|
||||
float4 y = _softrelu(x);
|
||||
x = x * _tanh(y);
|
||||
float4 y = _softrelu(x, alpha);
|
||||
x = x * _tanh(y, alpha);
|
||||
return x;
|
||||
}
|
||||
|
||||
|
|
@ -72,14 +72,15 @@ __kernel void func_name##_F32toF32 \
|
|||
float inputScale, \
|
||||
float inputTail, \
|
||||
float outputScale, \
|
||||
float outputZP \
|
||||
float outputZP, \
|
||||
float alpha \
|
||||
) \
|
||||
{ \
|
||||
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
|
||||
\
|
||||
float4 src = read_imagef(input, coord); \
|
||||
\
|
||||
float4 dst = eltwise_unary_##func_name(src); \
|
||||
float4 dst = eltwise_unary_##func_name(src, alpha); \
|
||||
\
|
||||
write_imagef(output, coord, dst); \
|
||||
}
|
||||
|
|
@ -99,14 +100,15 @@ __kernel void func_name##_F32toF32_2D \
|
|||
float inputScale, \
|
||||
float inputTail, \
|
||||
float outputScale, \
|
||||
float outputZP \
|
||||
float outputZP, \
|
||||
float alpha \
|
||||
) \
|
||||
{ \
|
||||
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
|
||||
\
|
||||
float4 src = read_imagef(input, coord); \
|
||||
\
|
||||
float4 dst = eltwise_unary_##func_name(src); \
|
||||
float4 dst = eltwise_unary_##func_name(src, alpha); \
|
||||
\
|
||||
write_imagef(output, coord, dst); \
|
||||
}
|
||||
|
|
@ -126,7 +128,8 @@ __kernel void func_name##_U8toU8 \
|
|||
float inputScale, \
|
||||
float inputTail, \
|
||||
float outputScale, \
|
||||
float outputZP \
|
||||
float outputZP, \
|
||||
float alpha \
|
||||
) \
|
||||
{ \
|
||||
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
|
||||
|
|
@ -134,7 +137,7 @@ __kernel void func_name##_U8toU8 \
|
|||
uint4 src = read_imageui(input, coord); \
|
||||
float4 data = convert_float4(src) * inputScale - inputTail; \
|
||||
\
|
||||
data = eltwise_unary_##func_name(data); \
|
||||
data = eltwise_unary_##func_name(data, alpha); \
|
||||
uint4 dst = convert_uint4(data * outputScale + outputZP); \
|
||||
\
|
||||
write_imageui(output, coord, dst); \
|
||||
|
|
@ -155,7 +158,8 @@ __kernel void func_name##_U8toU8_2D \
|
|||
float inputScale, \
|
||||
float inputTail, \
|
||||
float outputScale, \
|
||||
float outputZP \
|
||||
float outputZP, \
|
||||
float alpha \
|
||||
) \
|
||||
{ \
|
||||
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
|
||||
|
|
@ -163,7 +167,7 @@ __kernel void func_name##_U8toU8_2D \
|
|||
uint4 src = read_imageui(input, coord); \
|
||||
float4 data = convert_float4(src) * inputScale - inputTail; \
|
||||
\
|
||||
data = eltwise_unary_##func_name(data); \
|
||||
data = eltwise_unary_##func_name(data, alpha); \
|
||||
uint4 dst = convert_uint4(data * outputScale + outputZP); \
|
||||
\
|
||||
write_imageui(output, coord, dst); \
|
||||
|
|
@ -184,7 +188,8 @@ __kernel void neg_I32toI32
|
|||
float inputScale,
|
||||
float inputTail,
|
||||
float outputScale,
|
||||
float outputZP
|
||||
float outputZP,
|
||||
float alpha
|
||||
)
|
||||
{
|
||||
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
|
|
@ -202,7 +207,8 @@ __kernel void neg_I32toI32_2D
|
|||
float inputScale,
|
||||
float inputTail,
|
||||
float outputScale,
|
||||
float outputZP
|
||||
float outputZP,
|
||||
float alpha
|
||||
)
|
||||
{
|
||||
int2 coord = (int2)(get_global_id(0), get_global_id(1));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
__kernel void resize_1d_bilinear_F32toF32(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
float scale_x,
|
||||
float half_pixel_value
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
float in_x = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float left_x_f = floor(in_x);
|
||||
float x_lerp = in_x - left_x_f;
|
||||
int left_x_idx = convert_int(left_x_f);
|
||||
int4 coord_in = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
|
||||
float4 top_l, top_r, top, bottom, dst;
|
||||
|
||||
top_l = read_imagef(input, coord_in);
|
||||
coord_in.x++;
|
||||
top_r = read_imagef(input, coord_in);
|
||||
|
||||
top_r = top_r - top_l;
|
||||
dst = top_l + x_lerp * top_r;
|
||||
|
||||
write_imagef(output, coord_out, dst);
|
||||
|
||||
}
|
||||
|
||||
|
||||
__kernel void resize_1d_bilinear_U8toU8(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
float scale_x,
|
||||
float half_pixel_value,
|
||||
float in_scale,
|
||||
float in_tail,
|
||||
float out_scale,
|
||||
float out_tail
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
float in_x = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float left_x_f = floor(in_x);
|
||||
float x_lerp = in_x - left_x_f;
|
||||
int left_x_idx = convert_int(left_x_f);
|
||||
int4 coord_in = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
|
||||
float4 top_l, top_r, top;
|
||||
uint4 dst;
|
||||
|
||||
top_l = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
|
||||
coord_in.x++;
|
||||
top_r = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
|
||||
|
||||
top_r = top_r - top_l;
|
||||
top = top_l + x_lerp * top_r;
|
||||
dst = convert_uint4(top * out_scale + out_tail);
|
||||
|
||||
write_imageui(output, coord_out, dst);
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
#define NEAREST_INDEX_PROCESS() \
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
|
||||
float in_x = (convert_float(coord_out.x) + half_pixel_value) * scale_x + round_value; \
|
||||
int in_x_idx = convert_int(in_x); \
|
||||
|
||||
__kernel void resize_1d_nearest_F32toF32(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
float scale_x,
|
||||
float half_pixel_value,
|
||||
float round_value)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
|
||||
float4 dst;
|
||||
dst = read_imagef(input, coord_in);
|
||||
write_imagef(output, coord_out, dst);
|
||||
}
|
||||
|
||||
|
||||
__kernel void resize_1d_nearest_U8toU8(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
float scale_x,
|
||||
float half_pixel_value,
|
||||
float round_value,
|
||||
float output_scale,
|
||||
float output_tail)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
|
||||
uint4 dst;
|
||||
dst = convert_uint4(convert_float4(read_imageui(input, coord_in)) * output_scale + output_tail);
|
||||
write_imageui(output, coord_out, dst);
|
||||
}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform float alpha;
|
||||
|
||||
float4 eltwise_unary_sin(float4 x)
|
||||
{
|
||||
return native_sin(x);
|
||||
|
|
@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
|
|||
float4 eltwise_unary_elu(float4 val)
|
||||
{
|
||||
float4 x = val * logE;
|
||||
x = exp2(x) - 1;
|
||||
x = exp2(x) * alpha - alpha;
|
||||
|
||||
return val < 0 ? x : val;
|
||||
}
|
||||
|
|
@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
|
|||
__kernel void func_name##_##src_type_name##to##dst_type_name##_2D( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int type \
|
||||
int type, \
|
||||
float _alpha \
|
||||
) \
|
||||
{ \
|
||||
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
|
||||
|
|
@ -194,7 +197,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
|
|||
__kernel void func_name##_BF16toBF16_2D( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int type \
|
||||
int type, \
|
||||
float _alpha \
|
||||
) \
|
||||
{ \
|
||||
int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform float alpha;
|
||||
|
||||
float4 eltwise_unary_sin(float4 x)
|
||||
{
|
||||
return native_sin(x);
|
||||
|
|
@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
|
|||
float4 eltwise_unary_elu(float4 val)
|
||||
{
|
||||
float4 x = val * logE;
|
||||
x = exp2(x) - 1;
|
||||
x = exp2(x) * alpha - alpha;
|
||||
|
||||
return val < 0 ? x : val;
|
||||
}
|
||||
|
|
@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
|
|||
__kernel void func_name##_##src_type_name##to##dst_type_name( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int type \
|
||||
int type, \
|
||||
float _alpha \
|
||||
) \
|
||||
{ \
|
||||
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
|
||||
|
|
@ -192,7 +195,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
|
|||
__kernel void func_name##_BF16toBF16( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int type \
|
||||
int type, \
|
||||
float _alpha \
|
||||
) \
|
||||
{ \
|
||||
int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,148 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform float scale_x;
|
||||
_viv_uniform int out_height;
|
||||
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
|
||||
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
|
||||
_viv_uniform VXC_512Bits uniConvBF16toF32_odd_2x8;
|
||||
_viv_uniform VXC_512Bits uniConvBF16toF32_even_2x8;
|
||||
_viv_uniform VXC_512Bits uniConvBF16toF32_Part0_2x8;
|
||||
_viv_uniform VXC_512Bits uniConvBF16toF32_Part1_2x8;
|
||||
_viv_uniform float half_pixel_value;
|
||||
|
||||
__kernel void resize_1d_bilinear_BF16toBF16_DOWN
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
|
||||
vxc_short8 top;
|
||||
vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.y;
|
||||
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.z;
|
||||
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.w;
|
||||
VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 src;
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
float4 dst4;
|
||||
|
||||
VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_even_2x8);
|
||||
_viv_asm(COPY, right4, src, 16);
|
||||
VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_odd_2x8);
|
||||
_viv_asm(COPY, left4, src, 16);
|
||||
right4 -= left4;
|
||||
dst4 = right4 * x_lerp + left4;
|
||||
vxc_ushort8 tmp, dst;
|
||||
_viv_asm(COPY, tmp, dst4, 16);
|
||||
dst.s0123 = tmp.s1357;
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
||||
__kernel void resize_1d_bilinear_BF16toBF16_UP
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
float4 right_x_f = ceil(in_x);
|
||||
int4 right_x_idx = convert_int4(right_x_f);
|
||||
|
||||
vxc_ushort8 src0, src1, dst0;
|
||||
vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 bitextract_p0;
|
||||
vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
|
||||
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
|
||||
vxc_ushort8 constData = 16;
|
||||
VXC_DP2x8(maskShift, bitextract_p0, constData, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
coord_in.y ++;
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 dst_tmp;
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part0_2x8);
|
||||
_viv_asm(COPY, left4, dst_tmp, 16);
|
||||
VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part1_2x8);
|
||||
_viv_asm(COPY, right4, dst_tmp, 16);
|
||||
right4 -= left4;
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
|
||||
vxc_ushort8 tmp, dst;
|
||||
_viv_asm(COPY, tmp, dst4, 16);
|
||||
dst.s0123 = tmp.s1357;
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_out.y++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform VXC_512Bits uniResizeNxDown_2x8;
|
||||
_viv_uniform int out_height;
|
||||
|
||||
#define RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
|
||||
read_type read_data, save_data; \
|
||||
data_type in0, result; \
|
||||
\
|
||||
int8 input_desc; \
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
|
||||
_viv_asm(MOV, coord_in.w, baseAddr); \
|
||||
\
|
||||
int8 output_desc; \
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
|
||||
_viv_asm(MOV, coord_out.w, baseAddr); \
|
||||
\
|
||||
while (coord_out.y < out_height) \
|
||||
{ \
|
||||
VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
|
||||
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0)); \
|
||||
_viv_asm(COPY, in0, read_data, 16); \
|
||||
VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
|
||||
_viv_asm(COPY, save_data, result, 16); \
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
|
||||
VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
|
||||
coord_in.y++; \
|
||||
coord_out.y++; \
|
||||
} \
|
||||
|
||||
#define RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = coord_out.x << 1; \
|
||||
RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
|
||||
|
||||
|
||||
#define RESIZE_1D_2X_DOWN_8BIT_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = coord_out.x << 1; \
|
||||
RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_2X_DOWN_8BIT_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_2X_DOWN_8BIT_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
|
||||
|
||||
#define RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
|
||||
read_type read_data, read_data1, save_data; \
|
||||
data_type in0, in1, result; \
|
||||
\
|
||||
int8 input_desc; \
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
|
||||
_viv_asm(MOV, coord_in.w, baseAddr); \
|
||||
\
|
||||
int8 output_desc; \
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
|
||||
_viv_asm(MOV, coord_out.w, baseAddr); \
|
||||
\
|
||||
while (coord_out.y < out_height) \
|
||||
{ \
|
||||
VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
|
||||
_viv_asm(COPY, in0, read_data, 16); \
|
||||
VXC_OP4(img_load_3d, read_data1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0), \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
|
||||
_viv_asm(COPY, in1, read_data1, 16); \
|
||||
VXC_DP2x8(result, in0, in1, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
|
||||
_viv_asm(COPY, save_data, result, 16); \
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
|
||||
VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
|
||||
coord_in.y++; \
|
||||
coord_out.y++; \
|
||||
} \
|
||||
|
||||
#define RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = coord_out.x << 1; \
|
||||
RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
|
||||
#define RESIZE_1D_2X_DOWN_16BIT_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = coord_out.x << 1; \
|
||||
RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_2X_DOWN_16BIT_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_2X_DOWN_16BIT_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,216 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
|
||||
_viv_uniform VXC_512Bits uniFp16toFp32_4x4;
|
||||
_viv_uniform VXC_512Bits uniRightSubLeft_4x4;
|
||||
_viv_uniform VXC_512Bits uniExtactHalf8_2x8;
|
||||
_viv_uniform float scale_x;
|
||||
_viv_uniform int out_height;
|
||||
_viv_uniform float uint8Scale;
|
||||
_viv_uniform float output_ZP;
|
||||
_viv_uniform VXC_512Bits uniFp16toFp32_part1_4x4;
|
||||
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
|
||||
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
|
||||
_viv_uniform float half_pixel_value;
|
||||
_viv_uniform VXC_512Bits uniConvertFp2FP32_left_4x4;
|
||||
_viv_uniform VXC_512Bits uniConvertFp2FP32_right_4x4;
|
||||
|
||||
__kernel void resize_1d_bilinear_F16toF16_DOWN
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
vxc_ushort8 src, result;
|
||||
vxc_half8 src_half, dst;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
_viv_asm(COPY, src_half, src, 16);
|
||||
|
||||
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
|
||||
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
|
||||
right4 -= left4;
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
|
||||
half4 tmp;
|
||||
_viv_asm(CONV, tmp, dst4);
|
||||
VXC_DP2x8(dst, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
|
||||
_viv_asm(COPY, result, dst, 16);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
|
||||
}
|
||||
|
||||
__kernel void resize_1d_bilinear_F16toU8_DOWN
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
vxc_ushort8 src;
|
||||
vxc_uchar8 result;
|
||||
vxc_half8 src_half, dst;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
_viv_asm(COPY, src_half, src, 16);
|
||||
|
||||
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
|
||||
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
|
||||
right4 -= left4;
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
|
||||
dst4 = dst4 * uint8Scale + output_ZP;
|
||||
int4 dst = convert_int4_rte(dst4);
|
||||
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
||||
__kernel void resize_1d_bilinear_F16toF16_UP
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
float4 right_x_f = ceil(in_x);
|
||||
int4 right_x_idx = convert_int4(right_x_f);
|
||||
|
||||
vxc_ushort8 src0, src1, dst0;
|
||||
vxc_half8 top;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 bitextract_p0;
|
||||
vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
|
||||
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
|
||||
vxc_ushort8 constData = 16;
|
||||
VXC_DP2x8(maskShift, bitextract_p0, constData, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
_viv_asm(COPY, top, dst0, 16);
|
||||
|
||||
coord_in.y ++;
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_4x4);
|
||||
VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_part1_4x4);
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
|
||||
half4 tmp;
|
||||
_viv_asm(CONV, tmp, dst4);
|
||||
VXC_DP2x8(top, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
|
||||
_viv_asm(COPY, dst0, top, 16);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst0,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_out.y++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
|
||||
_viv_uniform float scale_x;
|
||||
_viv_uniform int out_height;
|
||||
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
|
||||
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
|
||||
_viv_uniform float dfpScale;
|
||||
_viv_uniform float half_pixel_value;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
|
||||
|
||||
__kernel void resize_1d_bilinear_I16toI16_UP
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
float4 right_x_f = ceil(in_x);
|
||||
int4 right_x_idx = convert_int4(right_x_f);
|
||||
|
||||
vxc_ushort8 src0, src1, dst0;
|
||||
|
||||
vxc_short8 top;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 bitextract_p0;
|
||||
vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
|
||||
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
|
||||
vxc_ushort8 constData = 16;
|
||||
VXC_DP2x8(maskShift, bitextract_p0, constData, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
_viv_asm(COPY, top, dst0, 16);
|
||||
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
coord_in.y ++;
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
|
||||
VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
dst4 = dst4 * dfpScale;
|
||||
int4 dst = convert_int4_rte(dst4);
|
||||
VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
||||
__kernel void resize_1d_bilinear_I16toI16_DOWN
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
vxc_short8 src;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
vxc_short8 result;
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
|
||||
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
|
||||
right4 -= left4;
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
dst4 = dst4 * dfpScale;
|
||||
|
||||
int4 dst = convert_int4_rte(dst4);
|
||||
|
||||
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
|
||||
_viv_uniform float scale_x;
|
||||
_viv_uniform int out_height;
|
||||
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
|
||||
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
|
||||
_viv_uniform float dfpScale;
|
||||
_viv_uniform float half_pixel_value;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
|
||||
_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
|
||||
|
||||
__kernel void resize_1d_bilinear_I8toI8_UP
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
float4 right_x_f = ceil(in_x);
|
||||
int4 right_x_idx = convert_int4(right_x_f);
|
||||
|
||||
vxc_uchar16 src0, dst0;
|
||||
|
||||
vxc_char16 top;
|
||||
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 bitextract_p0;
|
||||
vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
|
||||
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
|
||||
vxc_ushort8 constData = 8;
|
||||
VXC_DP2x8(maskShift, bitextract_p0, constData, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_BitExtract(dst0, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
_viv_asm(COPY, top, dst0, 16);
|
||||
|
||||
coord_in.y++;
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
VXC_DP4x4(left4, top, top, \
|
||||
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
|
||||
VXC_DP4x4(right4, top, top, \
|
||||
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
|
||||
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
|
||||
dst4 = dst4 * dfpScale;
|
||||
int4 dst = convert_int4_rte(dst4);
|
||||
VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
||||
__kernel void resize_1d_bilinear_I8toI8_DOWN
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
vxc_char16 src;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
vxc_char16 result;
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
|
||||
VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
|
||||
right4 -= left4;
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
dst4 = dst4 * dfpScale;
|
||||
|
||||
int4 dst = convert_int4_rte(dst4);
|
||||
|
||||
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,212 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_4x4;
|
||||
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_left_4x4;
|
||||
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_right_4x4;
|
||||
_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
|
||||
_viv_uniform float scale_x;
|
||||
_viv_uniform int out_height;
|
||||
_viv_uniform int input_ZP;
|
||||
_viv_uniform float uint8Scale;
|
||||
_viv_uniform float output_ZP;
|
||||
_viv_uniform VXC_512Bits uniU8SubZPtoFp32_part1_4x4;
|
||||
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
|
||||
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
|
||||
_viv_uniform float half_pixel_value;
|
||||
|
||||
__kernel void resize_1d_bilinear_U8toF16_DOWN
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
vxc_uchar16 src;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
unsigned char inputZP;
|
||||
_viv_asm(COPY, inputZP, input_ZP, 4);
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
|
||||
VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
|
||||
right4 -= left4;
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
dst4 *= uint8Scale;
|
||||
half4 dst;
|
||||
_viv_asm(CONV, dst, dst4);
|
||||
vxc_short8 dst_short;
|
||||
_viv_asm(COPY, dst_short, dst, 16);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst_short,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
||||
__kernel void resize_1d_bilinear_U8toU8_UP
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
float4 right_x_f = ceil(in_x);
|
||||
int4 right_x_idx = convert_int4(right_x_f);
|
||||
|
||||
|
||||
vxc_uchar16 src0, src1;
|
||||
|
||||
vxc_uchar16 top;
|
||||
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 bitextract_p0;
|
||||
vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
|
||||
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
|
||||
vxc_ushort8 constData = 8;
|
||||
VXC_DP2x8(maskShift, bitextract_p0, constData, \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_BitExtract(top, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
|
||||
VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
|
||||
unsigned char inputZP;
|
||||
_viv_asm(COPY, inputZP, input_ZP, 4);
|
||||
VXC_DP4x4(left4, top, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_4x4);
|
||||
VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_part1_4x4);
|
||||
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
dst4 = dst4 * uint8Scale + output_ZP;
|
||||
int4 dst = convert_int4_rte(dst4);
|
||||
VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
||||
__kernel void resize_1d_bilinear_U8toU8_DOWN
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
float4 left_x_f = floor(in_x);
|
||||
float4 x_lerp = in_x - left_x_f;
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
vxc_uchar16 src;
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
float4 left4;
|
||||
float4 right4;
|
||||
vxc_uchar16 result;
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
unsigned char inputZP;
|
||||
_viv_asm(COPY, inputZP, input_ZP, 4);
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
|
||||
do
|
||||
{
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = left_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
|
||||
VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
|
||||
right4 -= left4;
|
||||
float4 dst4 = right4 * x_lerp + left4;
|
||||
dst4 = dst4 * uint8Scale + output_ZP;
|
||||
int4 dst = convert_int4_rte(dst4);
|
||||
|
||||
VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
coord_in.y++;
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform float scale_x;
|
||||
_viv_uniform int out_height;
|
||||
_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
|
||||
_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
|
||||
_viv_uniform VXC_512Bits uniBilinear_4x4;
|
||||
_viv_uniform float half_pixel_value;
|
||||
|
||||
__kernel void resize_1d_bilinear_U8toU8_UP_opt
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers,
|
||||
__read_only image2d_array_t scale
|
||||
)
|
||||
{
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(2), 0);
|
||||
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3);
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
|
||||
|
||||
float4 left_x_f = floor(in_x);
|
||||
int4 left_x_idx = convert_int4(left_x_f);
|
||||
int4 right_x_idx = left_x_idx + 1;
|
||||
|
||||
vxc_uchar16 src0;
|
||||
|
||||
vxc_uchar16 src_mask;
|
||||
|
||||
int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 bitextract_p0;
|
||||
vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
|
||||
VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx,
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
|
||||
vxc_ushort8 constData = 8;
|
||||
VXC_DP2x8(maskShift, bitextract_p0, constData,
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
|
||||
|
||||
vxc_ushort8 lerp_0;
|
||||
vxc_half8 lerp;
|
||||
|
||||
int2 coord = (int2)(coord_out.x * 2, 0);
|
||||
VXC_ReadImage(lerp_0, scale, coord, VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
_viv_asm(COPY, lerp, lerp_0, 16);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
do
|
||||
{
|
||||
VXC_BitExtract(src_mask, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
coord_in.y++;
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
|
||||
VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_uchar16 dst;
|
||||
VXC_DP4x4(dst, src_mask, lerp,
|
||||
VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniBilinear_4x4);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
|
||||
coord_out.y ++;
|
||||
} while (coord_out.y < out_height);
|
||||
}
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform VXC_512Bits uniResizeNxUp_2x8;
|
||||
_viv_uniform int out_height;
|
||||
|
||||
#define RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
|
||||
read_type read_data, save_data; \
|
||||
data_type in0, result; \
|
||||
\
|
||||
int8 input_desc; \
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
|
||||
_viv_asm(MOV, coord_in.w, baseAddr); \
|
||||
\
|
||||
int8 output_desc; \
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
|
||||
_viv_asm(MOV, coord_out.w, baseAddr); \
|
||||
\
|
||||
while (coord_out.y < out_height) \
|
||||
{ \
|
||||
VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
|
||||
_viv_asm(COPY, in0, read_data, 16); \
|
||||
VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxUp_2x8); \
|
||||
_viv_asm(COPY, save_data, result, 16); \
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
|
||||
VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
|
||||
coord_in.y++; \
|
||||
coord_out.y++; \
|
||||
} \
|
||||
|
||||
#define RESIZE_1D_2X_HALF_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_HALF_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = (coord_out.x * 2 - 1) >> 2; \
|
||||
coord_in.x = coord_out.x == 0 ? -1 : coord_in.x; \
|
||||
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_2X_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_2X_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
RESIZE_1D_2X_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_2X_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
#define RESIZE_1D_2X_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = coord_out.x >> 1; \
|
||||
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_2X_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_2X_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
RESIZE_1D_2X_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_2X_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
#define RESIZE_1D_4X_HALF_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_HALF_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = (coord_out.x * 2 - 3) >> 3; \
|
||||
coord_in.x = coord_out.x == 0 ? -1 : coord_in.x; \
|
||||
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_4X_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_4X_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
RESIZE_1D_4X_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_4X_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
#define RESIZE_1D_4X_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = coord_out.x >> 2; \
|
||||
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_4X_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_4X_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
RESIZE_1D_4X_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_4X_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
#define RESIZE_1D_8X_HALF_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_HALF_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = (coord_out.x * 2 - 7) >> 4; \
|
||||
coord_in.x = coord_out.x == 0 ? -1 : coord_in.x; \
|
||||
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_8X_HALF_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_8X_HALF_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
RESIZE_1D_8X_HALF_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_8X_HALF_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
#define RESIZE_1D_8X_SAME(name0, name1, read_type, data_type) \
|
||||
__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_SAME \
|
||||
( \
|
||||
__read_only image2d_array_t input, \
|
||||
__write_only image2d_array_t output, \
|
||||
int scale_type \
|
||||
) \
|
||||
{ \
|
||||
int4 coord_out = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
int4 coord_in = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
|
||||
coord_in.x = coord_out.x >> 3; \
|
||||
RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
|
||||
}
|
||||
|
||||
RESIZE_1D_8X_SAME(U8, U8, vxc_uchar16, vxc_uchar16)
|
||||
RESIZE_1D_8X_SAME(I8, I8, vxc_char16, vxc_char16)
|
||||
RESIZE_1D_8X_SAME(I16, I16, vxc_short8, vxc_short8)
|
||||
RESIZE_1D_8X_SAME(F16, F16, vxc_short8, vxc_half8)
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,337 @@
|
|||
#include "cl_viv_vx_ext.h"
|
||||
|
||||
_viv_uniform VXC_512Bits uniMultiplyAndPostShift_2x8;
|
||||
_viv_uniform float scale_x;
|
||||
_viv_uniform float half_pixel_value;
|
||||
_viv_uniform float round_value;
|
||||
_viv_uniform int2 multAndoutZP;//[0:15] multiplier, [31:63] output zp
|
||||
|
||||
#define NEAREST_INDEX_PROCESS() \
|
||||
int4 coord_out = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
|
||||
int4 coord_x = coord_out.xxxx + (int4)(0, 1, 2, 3); \
|
||||
float4 in_x = (convert_float4(coord_x) + half_pixel_value) * scale_x + round_value; \
|
||||
int4 in_x_idx = convert_int4(in_x); \
|
||||
|
||||
|
||||
__kernel void resize_1d_nearest_F16toF16
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_short8 src;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
|
||||
_viv_uniform VXC_512Bits uniGetExtractData_2x8;
|
||||
__kernel void resize_1d_nearest_F16toF16_op
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_ushort8 src0, src1, dst;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
|
||||
vxc_ushort8 input_idx;
|
||||
_viv_asm(COPY, input_idx, in_x_idx, 16);
|
||||
VXC_DP2x8(mask, input_idx, input_idx, \
|
||||
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
|
||||
VXC_BitExtract(dst, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
|
||||
_viv_uniform VXC_512Bits uniConvertI8toI8_2x8;
|
||||
__kernel void resize_1d_nearest_I8toI8
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_char16 src;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
|
||||
__kernel void resize_1d_nearest_I8toI8_op
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_uchar16 src0, dst0;
|
||||
vxc_char16 dst;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
|
||||
vxc_ushort8 input_idx;
|
||||
_viv_asm(COPY, input_idx, in_x_idx, 16);
|
||||
VXC_DP2x8(mask, input_idx, input_idx, \
|
||||
VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
|
||||
VXC_BitExtract(dst0, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
|
||||
_viv_asm(COPY, dst, dst0, 8);
|
||||
VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
|
||||
__kernel void resize_1d_nearest_U8toU8
|
||||
(
|
||||
image2d_array_t input,
|
||||
image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_uchar16 src;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_ushort8 multiplier;
|
||||
_viv_asm(COPY, multiplier, multAndoutZP, 16);
|
||||
VXC_DP2x8(src, src, multiplier, \
|
||||
VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
|
||||
__kernel void resize_1d_nearest_U8toU8_op
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_uchar16 src0, dst;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
|
||||
vxc_ushort8 input_idx;
|
||||
_viv_asm(COPY, input_idx, in_x_idx, 16);
|
||||
VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
|
||||
VXC_BitExtract(dst, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
|
||||
vxc_ushort8 multiplier;
|
||||
_viv_asm(COPY, multiplier, multAndoutZP, 16);
|
||||
VXC_DP2x8(dst, dst, multiplier, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
|
||||
__kernel void resize_1d_nearest_I16toI16
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_short8 src;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.y;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.z;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
|
||||
coord_in.x = in_x_idx.w;
|
||||
VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
|
||||
__kernel void resize_1d_nearest_I16toI16_op
|
||||
(
|
||||
__read_only image2d_array_t input,
|
||||
__write_only image2d_array_t output,
|
||||
int align_corners,
|
||||
int half_pixel_centers
|
||||
)
|
||||
{
|
||||
NEAREST_INDEX_PROCESS()
|
||||
|
||||
vxc_ushort8 src0, src1, dst0;
|
||||
vxc_short8 dst;
|
||||
int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
|
||||
|
||||
int8 input_desc;
|
||||
_viv_asm(COPY, input_desc, input, sizeof(input_desc));
|
||||
int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
|
||||
_viv_asm(MOV, coord_in.w, baseAddr);
|
||||
|
||||
int8 output_desc;
|
||||
_viv_asm(COPY, output_desc, output, sizeof(output_desc));
|
||||
baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
|
||||
_viv_asm(MOV, coord_out.w, baseAddr);
|
||||
|
||||
VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
|
||||
VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
|
||||
|
||||
vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
|
||||
vxc_ushort8 input_idx;
|
||||
_viv_asm(COPY, input_idx, in_x_idx, 16);
|
||||
VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
|
||||
VXC_BitExtract(dst0, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
|
||||
_viv_asm(COPY, dst, dst0, 8);
|
||||
VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
|
||||
|
||||
VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
|
||||
VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -176,8 +176,8 @@ static vsi_status op_optimize
|
|||
reshape 3d input (xcn) --> 4d input (whcn)
|
||||
reshape 3d output(xcn) --> 4d output(whcn)
|
||||
*/
|
||||
shape[0] = inputs[0]->attr.size[0];
|
||||
shape[1] = 1;
|
||||
shape[0] = 1;
|
||||
shape[1] = inputs[0]->attr.size[0];
|
||||
shape[2] = inputs[0]->attr.size[1];
|
||||
shape[3] = inputs[0]->attr.size[2];
|
||||
dim = 4;
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ static vsi_bool op_check
|
|||
IO_TYPE(D_I32, D_F32)
|
||||
IO_TYPE(D_I32, D_I32)
|
||||
IO_TYPE(D_I32, D_U32)
|
||||
IO_TYPE(D_I32, D_F16)
|
||||
IO_TYPE(D_I32, D_BOOL8)
|
||||
IO_TYPE(D_U32, D_F32)
|
||||
IO_TYPE(D_U32, D_I32)
|
||||
|
|
@ -176,6 +177,7 @@ static vsi_bool op_check
|
|||
IO_TYPE(D_U8|Q_ASYM, D_F32)
|
||||
IO_TYPE(D_U8|Q_ASYM, D_I32)
|
||||
IO_TYPE(D_BF16, D_BF16)
|
||||
IO_TYPE(D_U8, D_F16)
|
||||
END_IO_TYPE_DECL(CAST)
|
||||
if(!VALIDATE_OP_IO_TYPES(CAST, self, inputs, self->input.num, outputs, self->output.num)) {
|
||||
char* desc = generate_op_io_types_desc(inputs,
|
||||
|
|
|
|||
|
|
@ -37,6 +37,29 @@
|
|||
#include "utils/vsi_nn_dtype_util.h"
|
||||
#include "utils/vsi_nn_constraint_check.h"
|
||||
|
||||
static vsi_bool _enable_concat_optimize()
|
||||
{
|
||||
char *envctrl;
|
||||
static int32_t enableOptimize = -1;
|
||||
|
||||
if (enableOptimize == -1)
|
||||
{
|
||||
enableOptimize = 1;
|
||||
envctrl = getenv("VSI_NN_ENABLE_CONCAT_OPTIMIZE");
|
||||
if (envctrl)
|
||||
{
|
||||
enableOptimize = atoi(envctrl);
|
||||
}
|
||||
}
|
||||
|
||||
if (enableOptimize == 1)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int32_t _get_input_num
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
|
|
@ -243,7 +266,8 @@ static vsi_status op_compute
|
|||
|
||||
status = VSI_SUCCESS;
|
||||
self->n = NULL;
|
||||
if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs))
|
||||
if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs)
|
||||
&& _enable_concat_optimize())
|
||||
{
|
||||
iter = self->nn_param.concat.lcl_data;
|
||||
while( NULL != iter )
|
||||
|
|
@ -397,7 +421,8 @@ static vsi_status op_optimize
|
|||
status = VSI_SUCCESS;
|
||||
/* we don't create tensor view if the axis is not the highest dimension */
|
||||
if (_is_highest_dimension(self, outputs) == FALSE ||
|
||||
_is_same_quant(self, inputs, outputs) == FALSE)
|
||||
_is_same_quant(self, inputs, outputs) == FALSE ||
|
||||
_enable_concat_optimize() == FALSE)
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -194,6 +194,7 @@ static vsi_bool op_check
|
|||
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
|
||||
IO_TYPE(D_I16|Q_DFP, D_I8|Q_DFP)
|
||||
IO_TYPE(D_I16|Q_DFP, D_U8|Q_ASYM)
|
||||
IO_TYPE(D_I16|Q_DFP, D_F16)
|
||||
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
|
||||
IO_TYPE(D_I8|Q_DFP, D_I8|Q_ASYM)
|
||||
IO_TYPE(D_I8|Q_ASYM, D_I8|Q_ASYM)
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_ops.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "vsi_nn_tensor_util.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
/*
|
||||
|
|
@ -73,6 +74,7 @@ static vsi_status op_compute
|
|||
{
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
|
||||
vsi_nn_kernel_param_release( ¶m );
|
||||
return status;
|
||||
} /* op_compute() */
|
||||
|
|
@ -119,7 +121,7 @@ static vsi_bool op_setup
|
|||
VSI_NN_ROUND_FLOOR
|
||||
);
|
||||
|
||||
outputs[0]->attr.size[1] = inputs[1]->attr.size[2];
|
||||
outputs[0]->attr.size[1] = inputs[0]->attr.size[1] * p->multiplier;
|
||||
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
|
||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,22 +45,30 @@ static vsi_status _eltwise_unary_op_compute
|
|||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
float alpha = 0;
|
||||
vsi_nn_kernel_param_t * param = NULL;
|
||||
|
||||
if( NULL == self )
|
||||
{
|
||||
return status;
|
||||
}
|
||||
param = vsi_nn_kernel_param_create();
|
||||
|
||||
alpha = self->nn_param.elu.alpha;
|
||||
vsi_nn_kernel_param_add_float32( param, "alpha", alpha );
|
||||
|
||||
// TODO: This optimzie is a hack for gpu path,
|
||||
// it should be moved to gpu kernel setup.
|
||||
self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
|
||||
kernel_name, inputs, 1, outputs, 1, NULL );
|
||||
kernel_name, inputs, 1, outputs, 1, param );
|
||||
|
||||
if( self->n )
|
||||
{
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
|
||||
vsi_nn_kernel_param_release( ¶m );
|
||||
|
||||
return status;
|
||||
} /* _eltwise_op_compute() */
|
||||
|
||||
|
|
@ -152,6 +160,19 @@ static vsi_bool op_check
|
|||
return TRUE;
|
||||
} /* op_check() */
|
||||
|
||||
static vsi_status op_init
|
||||
(
|
||||
vsi_nn_node_t * self
|
||||
)
|
||||
{
|
||||
if (vsi_nn_compareVersion(self->graph, 1, 1, 29) == -1)
|
||||
{
|
||||
self->nn_param.elu.alpha = 1;
|
||||
}
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* op_init() */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
@ -166,7 +187,7 @@ extern "C" {
|
|||
{ \
|
||||
return _eltwise_unary_op_compute( ""#kernel_name, self, inputs, outputs ); \
|
||||
} \
|
||||
DEF_OP_REG(name, NULL, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1)
|
||||
DEF_OP_REG(name, op_init, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1)
|
||||
|
||||
DEF_ELEMENT_WISE_UNARY_OP( SIN, sin );
|
||||
DEF_ELEMENT_WISE_UNARY_OP( EXP, exp );
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ static vsi_bool op_check
|
|||
IO_TYPE(D_I32, D_I32, D_I32, D_F16, D_I32)
|
||||
IO_TYPE(D_I32, D_I32, D_F32, D_F16, D_F32)
|
||||
IO_TYPE(D_I32, D_I32, D_U8|Q_ASYM, D_F16, D_U8|Q_ASYM)
|
||||
IO_TYPE(D_I32, D_I32, D_F32, D_F32, D_U8|Q_ASYM)
|
||||
END_IO_TYPE_DECL(HASHTABLE_LOOKUP)
|
||||
if (!VALIDATE_OP_IO_TYPES(HASHTABLE_LOOKUP, self, inputs, self->input.num, outputs, self->output.num))
|
||||
{
|
||||
|
|
|
|||
|
|
@ -0,0 +1,298 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_ops.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
/*
|
||||
Declare number of input and output.
|
||||
*/
|
||||
#define _INPUT_NUM (2)
|
||||
#define _OUTPUT_NUM (1)
|
||||
|
||||
static vsi_status op_compute
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
|
||||
status = vsi_nn_internal_compute_node( self );
|
||||
|
||||
return status;
|
||||
} /* op_compute() */
|
||||
|
||||
static vsi_bool op_check
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
vsi_nn_interp_param *p = NULL;
|
||||
|
||||
p = &self->nn_param.interp;
|
||||
|
||||
if ((p->pad_beg > 0) || (p->pad_end > 0))
|
||||
{
|
||||
VSILOGE("Only supports non-pos padding (cropping) for now ");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
} /* op_check() */
|
||||
|
||||
|
||||
static vsi_bool op_setup
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
vsi_nn_interp_param *p = NULL;
|
||||
int32_t height_in_eff_, width_in_eff_;
|
||||
int32_t height_out, width_out;
|
||||
vsi_nn_internal_node_t* curr = NULL;
|
||||
vsi_nn_internal_tensor_t *crop_tensor = NULL;
|
||||
vsi_nn_tensor_t *crop_in_tensor = NULL;
|
||||
float factor = 1.0f;
|
||||
int32_t pad_beg = 0;
|
||||
int32_t pad_end = 0;
|
||||
|
||||
if ( NULL == self )
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
p = &self->nn_param.interp;
|
||||
pad_beg = -p->pad_beg;
|
||||
pad_end = -p->pad_end;
|
||||
width_in_eff_ = inputs[0]->attr.size[0] + p->pad_beg + p->pad_end;
|
||||
height_in_eff_ = inputs[0]->attr.size[1] + p->pad_beg + p->pad_end;
|
||||
|
||||
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
||||
{
|
||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||
memcpy( outputs[0]->attr.size, inputs[0]->attr.size,
|
||||
VSI_NN_MAX_DIM_NUM * sizeof( uint32_t ) );
|
||||
if ((p->shrink_factor > 0) && (p->zoom_factor <= 0))
|
||||
{
|
||||
width_out = (width_in_eff_ - 1) / p->shrink_factor + 1;
|
||||
height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
|
||||
}
|
||||
else if ((p->zoom_factor > 0) && (p->shrink_factor <= 0))
|
||||
{
|
||||
width_out = (width_in_eff_ - 1) * (p->zoom_factor - 1) + width_in_eff_;
|
||||
height_out = (height_in_eff_ - 1) * (p->zoom_factor - 1) + height_in_eff_;
|
||||
}
|
||||
else if ((p->height > 0) && (p->width > 0))
|
||||
{
|
||||
width_out = p->width;
|
||||
height_out = p->height;
|
||||
}
|
||||
else if ((p->zoom_factor > 0) && (p->shrink_factor > 0))
|
||||
{
|
||||
width_out = (width_in_eff_ - 1) / p->shrink_factor + 1;
|
||||
height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
|
||||
width_out = (width_out - 1) * (p->zoom_factor - 1) + width_out;
|
||||
height_out = (height_out - 1) * (p->zoom_factor - 1) + height_out;
|
||||
}
|
||||
else if (NULL != inputs[1])
|
||||
{
|
||||
width_out = inputs[1]->attr.size[0];
|
||||
height_out = inputs[1]->attr.size[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
VSILOGE("Not support params ");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ((width_out < 0) || (height_out < 0) || (width_in_eff_ < 0) || (height_in_eff_ < 0))
|
||||
{
|
||||
VSILOGE("value shoud be positive: width_out %d height_out %d width_in_eff_ %d height_in_eff_ %d ",
|
||||
width_out, height_out, width_in_eff_, height_in_eff_);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
outputs[0]->attr.size[0] = width_out;
|
||||
outputs[0]->attr.size[1] = height_out;
|
||||
}
|
||||
|
||||
factor = (float)(outputs[0]->attr.size[0]) / (float)(width_in_eff_);
|
||||
|
||||
if ((pad_beg > 0) || (pad_end > 0))
|
||||
{
|
||||
vsi_nn_tensor_attr_t attr;
|
||||
int32_t use_virtual_tensor = 1;
|
||||
int32_t *begin_dims;
|
||||
int32_t *end_dims;
|
||||
int32_t *stride_dims;
|
||||
uint32_t i;
|
||||
memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
|
||||
vsi_nn_internal_init_tensor_attr(&attr, &inputs[0]->attr.dtype, use_virtual_tensor);
|
||||
crop_tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
|
||||
crop_in_tensor = crop_tensor->t;
|
||||
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_STRIDED_SLICE, 1, 1 );
|
||||
curr->node->nn_param.strided_slice.begin_dims_num = inputs[0]->attr.dim_num;
|
||||
curr->node->nn_param.strided_slice.end_dims_num = inputs[0]->attr.dim_num;
|
||||
curr->node->nn_param.strided_slice.stride_dims_num = inputs[0]->attr.dim_num;
|
||||
curr->node->nn_param.strided_slice.begin_mask = 0;
|
||||
curr->node->nn_param.strided_slice.end_mask = 0;
|
||||
curr->node->nn_param.strided_slice.shrink_axis_mask = 0;
|
||||
begin_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
|
||||
VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
|
||||
end_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
|
||||
VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
|
||||
stride_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
|
||||
VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
|
||||
for (i = 0; i < inputs[0]->attr.dim_num; i++)
|
||||
{
|
||||
stride_dims[i] = 1;
|
||||
}
|
||||
|
||||
begin_dims[0] = pad_beg;
|
||||
begin_dims[1] = pad_beg;
|
||||
end_dims[0] = inputs[0]->attr.size[0] - pad_end;
|
||||
end_dims[1] = inputs[0]->attr.size[1] - pad_end;
|
||||
|
||||
if (inputs[0]->attr.dim_num > 2)
|
||||
{
|
||||
for (i = 2 ; i < inputs[0]->attr.dim_num; i++)
|
||||
{
|
||||
begin_dims[i] = 0;
|
||||
end_dims[i] = inputs[0]->attr.size[i];
|
||||
}
|
||||
}
|
||||
curr->node->nn_param.strided_slice.begin_dims = begin_dims;
|
||||
curr->node->nn_param.strided_slice.end_dims = end_dims;
|
||||
curr->node->nn_param.strided_slice.stride_dims = stride_dims;
|
||||
curr->inputs[0] = inputs[0];
|
||||
curr->outputs[0] = crop_in_tensor;
|
||||
vsi_nn_internal_setup_node(self, curr);
|
||||
}
|
||||
else
|
||||
{
|
||||
crop_in_tensor = inputs[0];
|
||||
}
|
||||
|
||||
if ((width_in_eff_ == (int32_t)outputs[0]->attr.size[0]) && (height_in_eff_ == (int32_t)outputs[0]->attr.size[1]))
|
||||
{
|
||||
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 1, 1 );
|
||||
curr->inputs[0] = crop_in_tensor;
|
||||
curr->outputs[0] = outputs[0];
|
||||
vsi_nn_internal_setup_node(self, curr);
|
||||
}
|
||||
else
|
||||
{
|
||||
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_INTERNAL, 1, 1 );
|
||||
curr->node->nn_param.resize_internal.align_corners = vx_true_e;
|
||||
curr->node->nn_param.resize_internal.factor = factor;
|
||||
curr->node->nn_param.resize_internal.half_pixel_centers = vx_false_e;
|
||||
curr->inputs[0] = crop_in_tensor;
|
||||
curr->outputs[0] = outputs[0];
|
||||
vsi_nn_internal_setup_node(self, curr);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
} /* op_setup() */
|
||||
|
||||
static vsi_status op_optimize
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
vsi_nn_opt_direction_e direction
|
||||
)
|
||||
{
|
||||
vsi_status status;
|
||||
|
||||
status = VSI_SUCCESS;
|
||||
vsi_nn_internal_optimize_node( self, direction );
|
||||
|
||||
return status;
|
||||
} /* op_optimize() */
|
||||
|
||||
static vsi_status op_init
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
|
||||
status = vsi_nn_internal_init_node_wksp(self);
|
||||
self->nn_param.interp.height = 0;
|
||||
self->nn_param.interp.width = 0;
|
||||
self->nn_param.interp.pad_beg = 0;
|
||||
self->nn_param.interp.pad_end = 0;
|
||||
self->nn_param.interp.shrink_factor = 0;
|
||||
self->nn_param.interp.zoom_factor = 0;
|
||||
|
||||
return status;
|
||||
} /* op_init() */
|
||||
|
||||
static vsi_status op_deinit
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
|
||||
vsi_nn_internal_deinit_node_wksp(self);
|
||||
status = vsi_nn_op_common_deinit(self);
|
||||
|
||||
return status;
|
||||
} /* op_deinit() */
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Registrar */
|
||||
DEF_OP_REG
|
||||
(
|
||||
/* op_name */ INTERP,
|
||||
/* init */ op_init,
|
||||
/* compute */ op_compute,
|
||||
/* deinit */ op_deinit,
|
||||
/* check */ op_check,
|
||||
/* setup */ op_setup,
|
||||
/* optimize */ op_optimize,
|
||||
/* input_num */ _INPUT_NUM,
|
||||
/* output_num */ _OUTPUT_NUM
|
||||
);
|
||||
|
||||
__END_DECLS
|
||||
|
||||
|
|
@ -155,11 +155,13 @@ static vsi_bool op_check
|
|||
BEGIN_IO_TYPE_DECL(PERMUTE, 1, 1)
|
||||
IO_TYPE(D_F16, D_F16)
|
||||
IO_TYPE(D_F16, D_F32)
|
||||
IO_TYPE(D_I16, D_I16)
|
||||
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
|
||||
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
|
||||
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM)
|
||||
IO_TYPE(D_U8|Q_ASYM, D_F16)
|
||||
IO_TYPE(D_U8|Q_ASYM, D_F32)
|
||||
IO_TYPE(D_I8|Q_SYM_PC, D_I8|Q_SYM_PC)
|
||||
IO_TYPE(D_BOOL8, D_BOOL8)
|
||||
IO_TYPE(D_BOOL8, D_I8|Q_DFP)
|
||||
IO_TYPE(D_F32, D_F32)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,207 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_ops.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
|
||||
/*
|
||||
Declare number of input and output.
|
||||
*/
|
||||
#define _INPUT_NUM (1)
|
||||
#define _OUTPUT_NUM (1)
|
||||
|
||||
static vsi_bool _is_same_shape
|
||||
(
|
||||
vsi_nn_tensor_t * inputs,
|
||||
uint32_t *sizes,
|
||||
uint32_t dims
|
||||
)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
|
||||
if (inputs->attr.dim_num != dims)
|
||||
return FALSE;
|
||||
|
||||
for (i = 0; i < dims; i++)
|
||||
{
|
||||
if (sizes[i] != inputs->attr.size[i])
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static vsi_status op_compute
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
|
||||
status = vsi_nn_internal_compute_node( self );
|
||||
|
||||
return status;
|
||||
} /* op_compute() */
|
||||
|
||||
|
||||
static vsi_status op_optimize
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs,
|
||||
vsi_nn_opt_direction_e direction
|
||||
)
|
||||
{
|
||||
if ( _is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num) )
|
||||
{
|
||||
return vsi_nn_internal_optimize_node(self, direction );
|
||||
}
|
||||
else
|
||||
{
|
||||
return VSI_SUCCESS;
|
||||
}
|
||||
} /* op_optimize() */
|
||||
|
||||
static vsi_bool op_check
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
return TRUE;
|
||||
} /* op_check() */
|
||||
|
||||
static vsi_bool op_setup
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
float factor = self->nn_param.resize_1d.factor;
|
||||
vsi_nn_internal_node_t* curr = NULL;
|
||||
|
||||
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
||||
{
|
||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||
if (factor != 0)
|
||||
{
|
||||
outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
|
||||
}
|
||||
else
|
||||
{
|
||||
outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
|
||||
}
|
||||
outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
|
||||
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
|
||||
outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
|
||||
}
|
||||
|
||||
if (_is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num))
|
||||
{
|
||||
vsi_nn_internal_init_node_wksp( self );
|
||||
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 0, 0 );
|
||||
curr->inputs[0] = inputs[0];
|
||||
curr->outputs[0] = outputs[0];
|
||||
vsi_nn_internal_setup_node(self, curr);
|
||||
}
|
||||
else if (VSI_NN_INTERPOLATION_BILINEAR == self->nn_param.resize_1d.type)
|
||||
{
|
||||
vsi_nn_internal_init_node_wksp( self );
|
||||
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL, 0, 0 );
|
||||
curr->node->nn_param.resize_1d_bilinear_internal.align_corners = self->nn_param.resize_1d.align_corners;
|
||||
curr->node->nn_param.resize_1d_bilinear_internal.factor = self->nn_param.resize_1d.factor;
|
||||
curr->node->nn_param.resize_1d_bilinear_internal.half_pixel_centers = \
|
||||
self->nn_param.resize_1d.half_pixel_centers;
|
||||
curr->inputs[0] = inputs[0];
|
||||
curr->outputs[0] = outputs[0];
|
||||
vsi_nn_internal_setup_node(self, curr);
|
||||
}
|
||||
else if (VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR == self->nn_param.resize_1d.type)
|
||||
{
|
||||
vsi_nn_internal_init_node_wksp( self );
|
||||
curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL, 0, 0 );
|
||||
curr->node->nn_param.resize_1d_nearest_internal.align_corners = self->nn_param.resize_1d.align_corners;
|
||||
curr->node->nn_param.resize_1d_nearest_internal.factor = self->nn_param.resize_1d.factor;
|
||||
curr->node->nn_param.resize_1d_nearest_internal.half_pixel_centers = \
|
||||
self->nn_param.resize_1d.half_pixel_centers;
|
||||
curr->inputs[0] = inputs[0];
|
||||
curr->outputs[0] = outputs[0];
|
||||
vsi_nn_internal_setup_node(self, curr);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
} /* op_setup() */
|
||||
|
||||
static vsi_status op_init
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
return VSI_SUCCESS;
|
||||
} /* op_init() */
|
||||
|
||||
static vsi_status op_deinit
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
|
||||
status = vsi_nn_internal_deinit_node_wksp(self);
|
||||
|
||||
return status;
|
||||
} /* op_deinit() */
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Registrar */
|
||||
DEF_OP_REG
|
||||
(
|
||||
/* op_name */ RESIZE_1D,
|
||||
/* init */ op_init,
|
||||
/* compute */ op_compute,
|
||||
/* deinit */ op_deinit,
|
||||
/* check */ op_check,
|
||||
/* setup */ op_setup,
|
||||
/* optimize */ op_optimize,
|
||||
/* input_num */ _INPUT_NUM,
|
||||
/* output_num */ _OUTPUT_NUM
|
||||
);
|
||||
|
||||
__END_DECLS
|
||||
|
||||
|
|
@ -0,0 +1,171 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_ops.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "utils/vsi_nn_constraint_check.h"
|
||||
|
||||
/*
|
||||
Declare number of input and output.
|
||||
*/
|
||||
#define _INPUT_NUM (1)
|
||||
#define _OUTPUT_NUM (1)
|
||||
|
||||
static vsi_status op_compute
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
int32_t align_corners = self->nn_param.resize_1d_bilinear_internal.align_corners;
|
||||
int32_t half_pixel_centers = self->nn_param.resize_1d_bilinear_internal.half_pixel_centers;
|
||||
vsi_nn_kernel_param_t * param = NULL;
|
||||
|
||||
param = vsi_nn_kernel_param_create();
|
||||
|
||||
vsi_nn_kernel_param_add_int32( param, "align_corners", align_corners );
|
||||
vsi_nn_kernel_param_add_int32( param, "half_pixel_centers", half_pixel_centers );
|
||||
|
||||
self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
|
||||
"resize_1d_bilinear",
|
||||
&inputs[0], 1,
|
||||
&outputs[0], 1, param );
|
||||
|
||||
if ( self->n )
|
||||
{
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
|
||||
vsi_nn_kernel_param_release( ¶m );
|
||||
|
||||
return status;
|
||||
} /* op_compute() */
|
||||
|
||||
static vsi_bool op_check
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
BEGIN_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL, 1, 1)
|
||||
IO_TYPE(D_F16, D_U8|Q_ASYM)
|
||||
IO_TYPE(D_F16, D_F16)
|
||||
IO_TYPE(D_BF16, D_BF16)
|
||||
IO_TYPE(D_F32, D_F32)
|
||||
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM)
|
||||
IO_TYPE(D_U8|Q_ASYM, D_F16)
|
||||
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
|
||||
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
|
||||
END_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL)
|
||||
if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_BILINEAR_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
|
||||
char* desc = generate_op_io_types_desc(inputs,
|
||||
self->input.num, outputs, self->output.num);
|
||||
VSILOGE("Inputs/Outputs data type not support: %s", desc);
|
||||
destroy_op_io_types_desc(desc);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
} /* op_check() */
|
||||
|
||||
static vsi_bool op_setup
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
float factor = self->nn_param.resize_1d_bilinear_internal.factor;
|
||||
|
||||
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
||||
{
|
||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||
if (factor != 0)
|
||||
{
|
||||
outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
|
||||
}
|
||||
else
|
||||
{
|
||||
outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
|
||||
}
|
||||
outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
|
||||
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
|
||||
outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
} /* op_setup() */
|
||||
|
||||
static vsi_status op_init
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* op_init() */
|
||||
|
||||
static vsi_status op_deinit
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
|
||||
status = vsi_nn_op_common_deinit(self);
|
||||
|
||||
return status;
|
||||
} /* op_deinit() */
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Registrar */
|
||||
DEF_OP_REG
|
||||
(
|
||||
/* op_name */ RESIZE_1D_BILINEAR_INTERNAL,
|
||||
/* init */ op_init,
|
||||
/* compute */ op_compute,
|
||||
/* deinit */ op_deinit,
|
||||
/* check */ op_check,
|
||||
/* setup */ op_setup,
|
||||
/* optimize */ NULL,
|
||||
/* input_num */ _INPUT_NUM,
|
||||
/* output_num */ _OUTPUT_NUM
|
||||
);
|
||||
|
||||
__END_DECLS
|
||||
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2020 Vivante Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "vsi_nn_types.h"
|
||||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_node.h"
|
||||
#include "vsi_nn_prv.h"
|
||||
#include "vsi_nn_ops.h"
|
||||
#include "vsi_nn_tensor.h"
|
||||
#include "utils/vsi_nn_util.h"
|
||||
#include "kernel/vsi_nn_kernel.h"
|
||||
#include "utils/vsi_nn_constraint_check.h"
|
||||
|
||||
/*
|
||||
Declare number of input and output.
|
||||
*/
|
||||
#define _INPUT_NUM (1)
|
||||
#define _OUTPUT_NUM (1)
|
||||
|
||||
static vsi_status op_compute
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_FAILURE;
|
||||
int32_t align_corners = self->nn_param.resize_1d_nearest_internal.align_corners;
|
||||
int32_t half_pixel_centers = self->nn_param.resize_1d_nearest_internal.half_pixel_centers;
|
||||
vsi_nn_kernel_param_t * param = NULL;
|
||||
|
||||
param = vsi_nn_kernel_param_create();
|
||||
|
||||
vsi_nn_kernel_param_add_int32( param, "align_corners", align_corners );
|
||||
vsi_nn_kernel_param_add_int32( param, "half_pixel_centers", half_pixel_centers );
|
||||
|
||||
self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
|
||||
"resize_1d_nearest",
|
||||
&inputs[0], 1,
|
||||
&outputs[0], 1, param );
|
||||
|
||||
if ( self->n )
|
||||
{
|
||||
status = VSI_SUCCESS;
|
||||
}
|
||||
|
||||
vsi_nn_kernel_param_release( ¶m );
|
||||
|
||||
return status;
|
||||
} /* op_compute() */
|
||||
|
||||
static vsi_bool op_check
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
BEGIN_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL, 1, 1)
|
||||
IO_TYPE(D_F16, D_F16)
|
||||
IO_TYPE(D_BF16, D_BF16)
|
||||
IO_TYPE(D_F32, D_F32)
|
||||
IO_TYPE(D_U8|Q_ASYM, D_U8|Q_ASYM)
|
||||
IO_TYPE(D_I8|Q_DFP, D_I8|Q_DFP)
|
||||
IO_TYPE(D_I16|Q_DFP, D_I16|Q_DFP)
|
||||
END_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL)
|
||||
if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_NEAREST_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
|
||||
char* desc = generate_op_io_types_desc(inputs,
|
||||
self->input.num, outputs, self->output.num);
|
||||
VSILOGE("Inputs/Outputs data type not support: %s", desc);
|
||||
destroy_op_io_types_desc(desc);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
} /* op_check() */
|
||||
|
||||
static vsi_bool op_setup
|
||||
(
|
||||
vsi_nn_node_t * self,
|
||||
vsi_nn_tensor_t ** inputs,
|
||||
vsi_nn_tensor_t ** outputs
|
||||
)
|
||||
{
|
||||
float factor = self->nn_param.resize_1d_nearest_internal.factor;
|
||||
|
||||
if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
|
||||
{
|
||||
outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
|
||||
if (factor != 0)
|
||||
{
|
||||
outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
|
||||
}
|
||||
else
|
||||
{
|
||||
outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
|
||||
}
|
||||
outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
|
||||
outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
|
||||
outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
} /* op_setup() */
|
||||
|
||||
static vsi_status op_init
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
|
||||
return VSI_SUCCESS;
|
||||
} /* op_init() */
|
||||
|
||||
static vsi_status op_deinit
|
||||
(
|
||||
vsi_nn_node_t* self
|
||||
)
|
||||
{
|
||||
vsi_status status = VSI_SUCCESS;
|
||||
|
||||
status = vsi_nn_op_common_deinit(self);
|
||||
|
||||
return status;
|
||||
} /* op_deinit() */
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Registrar */
|
||||
DEF_OP_REG
|
||||
(
|
||||
/* op_name */ RESIZE_1D_NEAREST_INTERNAL,
|
||||
/* init */ op_init,
|
||||
/* compute */ op_compute,
|
||||
/* deinit */ op_deinit,
|
||||
/* check */ op_check,
|
||||
/* setup */ op_setup,
|
||||
/* optimize */ NULL,
|
||||
/* input_num */ _INPUT_NUM,
|
||||
/* output_num */ _OUTPUT_NUM
|
||||
);
|
||||
|
||||
__END_DECLS
|
||||
|
||||
|
|
@ -163,12 +163,17 @@ static vsi_bool op_check
|
|||
ret = FALSE;
|
||||
}
|
||||
|
||||
if(ret)
|
||||
{
|
||||
BEGIN_IO_TYPE_DECL(SVDF, 5, 2)
|
||||
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_F16)
|
||||
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_F16)
|
||||
IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_F16)
|
||||
IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_F32)
|
||||
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_NONE)
|
||||
IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_NONE)
|
||||
IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_NONE)
|
||||
IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_NONE)
|
||||
END_IO_TYPE_DECL(SVDF)
|
||||
if(!VALIDATE_OP_IO_TYPES(SVDF, self, inputs, self->input.num, outputs, self->output.num)) {
|
||||
char* desc = generate_op_io_types_desc(inputs,
|
||||
|
|
|
|||
|
|
@ -117,8 +117,7 @@ static void _try_pack_tensor_data
|
|||
{
|
||||
*p_sz = (uint64_t)bytes;
|
||||
}
|
||||
free( data );
|
||||
data = NULL;
|
||||
vsi_nn_safe_free( data );
|
||||
}
|
||||
}
|
||||
} /* _pack_tensor_data() */
|
||||
|
|
@ -417,6 +416,8 @@ static _op_param_gen_t s_op_gen[] =
|
|||
/* PRE_PROCESS_NV12 */ NULL,
|
||||
/* SCATTER_ND */ NULL,
|
||||
/* DECONVOLUTION1D */ NULL,
|
||||
/* INTERP */ NULL,
|
||||
/* RESIZE_1D */ NULL,
|
||||
};
|
||||
_compiler_assert( _cnt_of_array(s_op_gen) == VSI_NN_OP_NUM, vsi_nn_code_generator_c );
|
||||
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ vsi_nn_tensor_t* vsi_nn_Concat
|
|||
src = j;
|
||||
memcpy( &buffer[dst * type_bytes], &tmp[src * type_bytes], type_bytes );
|
||||
}
|
||||
free(tmp);
|
||||
vsi_nn_safe_free( tmp );
|
||||
offset += strides[axis] * tensors[i]->attr.size[axis];
|
||||
}
|
||||
tensor_out = vsi_nn_CreateTensorFromData( graph, buffer, &output_attr );
|
||||
|
|
@ -221,11 +221,7 @@ vsi_nn_tensor_t* vsi_nn_ConvertTensorDtype
|
|||
}
|
||||
}
|
||||
|
||||
if( src_buf )
|
||||
{
|
||||
free( src_buf );
|
||||
src_buf = NULL;
|
||||
}
|
||||
vsi_nn_safe_free( src_buf );
|
||||
if( dst_buf )
|
||||
{
|
||||
free( dst_buf );
|
||||
|
|
@ -333,10 +329,7 @@ vsi_nn_tensor_t* vsi_nn_TensorAdd
|
|||
error:
|
||||
for ( i = 0; i < tensor_num; i++ )
|
||||
{
|
||||
if ( buffer[i] )
|
||||
{
|
||||
free(buffer[i]);
|
||||
}
|
||||
vsi_nn_safe_free( buffer[i] );
|
||||
}
|
||||
if( tmp )
|
||||
{
|
||||
|
|
|
|||
|
|
@ -710,33 +710,6 @@ vsi_bool vsi_nn_CheckFilePath
|
|||
return FALSE;
|
||||
} /* vsi_nn_CheckFilePath() */
|
||||
|
||||
void vsi_nn_GetFP32MultiAndPostShift
|
||||
(
|
||||
vx_float32 mult,
|
||||
vx_uint16 *M0,
|
||||
vx_int8 *N
|
||||
)
|
||||
{
|
||||
vx_uint32 uintMult = *((vx_uint32*)(&mult));
|
||||
vx_uint32 tmpMultiply = 0;
|
||||
vx_int32 exp = 0;
|
||||
vx_uint32 postShiftBit6to5 = 0;
|
||||
vx_uint32 postShift = 0;
|
||||
vx_int8 tmpPostShift = 0;
|
||||
|
||||
tmpMultiply = (uintMult & 0x7FFFFF) >> 8;
|
||||
*M0 = (vx_uint16)((1U << 15) + tmpMultiply);
|
||||
|
||||
exp = (uintMult & 0x7F800000) >> 23; /* postShift is Scale's exp*/
|
||||
tmpPostShift = 15 - ((vx_int8)exp - 127);
|
||||
postShift = tmpPostShift & 0x1F;
|
||||
tmpPostShift = tmpPostShift >> 5;
|
||||
postShiftBit6to5 = tmpPostShift & 3;
|
||||
|
||||
*N = (vx_int8)(((postShiftBit6to5 << 5) | (postShift & 0x1F)));
|
||||
*N = (((vx_int32)*N << 25) >> 25);
|
||||
}/* vsi_nn_GetFP32MultiAndPostShift() */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t* raw_addr;
|
||||
|
|
|
|||
|
|
@ -520,6 +520,7 @@ static vx_tensor _create_const_raw_tensor
|
|||
vx_tensor tensor = NULL;
|
||||
vx_tensor_create_params_t params;
|
||||
float * scales = NULL;
|
||||
int32_t * zeroPoints = NULL;
|
||||
|
||||
memset( ¶ms, 0, sizeof( vx_tensor_create_params_t ) );
|
||||
params.num_of_dims = attr.dim_num;
|
||||
|
|
@ -539,12 +540,14 @@ static vx_tensor _create_const_raw_tensor
|
|||
#ifdef VSI_PERCHANNEL_QUANTIZATION_SUPPORT
|
||||
// This is a hack that driver doesn't support const scale
|
||||
scales = (float *)malloc(sizeof(float) * attr.dtype.scale_dim);
|
||||
zeroPoints = (int32_t *)malloc(sizeof(int32_t) * attr.dtype.zero_points_dim);
|
||||
memcpy(scales, attr.dtype.scales, attr.dtype.scale_dim * sizeof(float));
|
||||
memcpy(zeroPoints, attr.dtype.zero_points, attr.dtype.zero_points_dim * sizeof(float));
|
||||
params.quant_data.affinePerChannel.channelDim = attr.dtype.channel_dim;
|
||||
params.quant_data.affinePerChannel.scaleCount = attr.dtype.scale_dim;
|
||||
params.quant_data.affinePerChannel.scales = scales;
|
||||
params.quant_data.affinePerChannel.zeroPoint = NULL;
|
||||
params.quant_data.affinePerChannel.zeroPointCount = 0;
|
||||
params.quant_data.affinePerChannel.zeroPoint = zeroPoints;
|
||||
params.quant_data.affinePerChannel.zeroPointCount = attr.dtype.zero_points_dim;
|
||||
break;
|
||||
#else
|
||||
VSILOGE( "can't support qnt_type VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC." );
|
||||
|
|
@ -580,6 +583,10 @@ static vx_tensor _create_const_raw_tensor
|
|||
{
|
||||
free( scales );
|
||||
}
|
||||
if (zeroPoints)
|
||||
{
|
||||
free( zeroPoints );
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -620,6 +627,10 @@ static vx_tensor _create_const_raw_tensor
|
|||
{
|
||||
free( scales );
|
||||
}
|
||||
if (zeroPoints)
|
||||
{
|
||||
free( zeroPoints );
|
||||
}
|
||||
|
||||
return tensor;
|
||||
} /* _create_const_raw_tensor() */
|
||||
|
|
@ -689,6 +700,8 @@ static void _convert_const_I8toU8
|
|||
|
||||
if ( tensor->t ) vxReleaseTensor(&tensor->t);
|
||||
tensor->t = vsi_nn_CreateRawTensorFromData(graph, data, attr);
|
||||
|
||||
vsi_nn_safe_free( data );
|
||||
}/* _convert_const_I8toU8() */
|
||||
|
||||
static vsi_status _convert_graph_const_tensor
|
||||
|
|
|
|||
|
|
@ -29,25 +29,49 @@
|
|||
#include "vsi_nn_log.h"
|
||||
#include "vsi_nn_types.h"
|
||||
|
||||
#ifdef __ANDROID__
|
||||
#if ANDROID_SDK_VERSION >= 30
|
||||
static const char* ENV_LOG_LEVEL = "vendor.VSI_NN_LOG_LEVEL";
|
||||
#else
|
||||
static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
|
||||
#endif
|
||||
#else
|
||||
static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
|
||||
#endif
|
||||
|
||||
int get_env_as_int(const char* env, int default_value) {
|
||||
|
||||
int value = default_value;
|
||||
#ifdef __ANDROID__
|
||||
{
|
||||
char value_str[100];
|
||||
int status = __system_property_get(env, value_str);
|
||||
if (status) {
|
||||
value = atoi(value_str);
|
||||
}
|
||||
}
|
||||
#else
|
||||
{
|
||||
char* env_s = getenv(env);
|
||||
if (env_s) {
|
||||
value = atoi(env_s);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static vsi_bool _check_log_level
|
||||
(
|
||||
vsi_nn_log_level_e level
|
||||
)
|
||||
{
|
||||
char *env_level_s;
|
||||
static vsi_nn_log_level_e env_level = VSI_NN_LOG_UNINIT;
|
||||
|
||||
if(env_level == VSI_NN_LOG_UNINIT)
|
||||
{
|
||||
env_level_s = getenv("VSI_NN_LOG_LEVEL");
|
||||
if(env_level_s)
|
||||
{
|
||||
env_level = (vsi_nn_log_level_e)atoi(env_level_s);
|
||||
}
|
||||
else
|
||||
{
|
||||
env_level = VSI_NN_LOG_WARN;
|
||||
}
|
||||
env_level = (vsi_nn_log_level_e)get_env_as_int(ENV_LOG_LEVEL, VSI_NN_LOG_WARN);
|
||||
}
|
||||
|
||||
if(env_level >= level)
|
||||
|
|
|
|||
|
|
@ -844,7 +844,7 @@ float * vsi_nn_ConvertTensorToFloat32Data
|
|||
|
||||
if( !tensor->attr.is_created_from_handle )
|
||||
{
|
||||
if(tensor_data)free(tensor_data);
|
||||
vsi_nn_safe_free( tensor_data );
|
||||
}
|
||||
return data;
|
||||
} /* vsi_nn_ConvertTensorToFloat32Data() */
|
||||
|
|
@ -1095,7 +1095,7 @@ void vsi_nn_SaveTensorToTextByFp32
|
|||
}
|
||||
fwrite( buf, count, 1, fp );
|
||||
fclose( fp );
|
||||
free( data );
|
||||
vsi_nn_safe_free( data );
|
||||
} /* vsi_nn_SaveTensorToTextByFp32() */
|
||||
|
||||
void vsi_nn_SaveTensorToText
|
||||
|
|
@ -1124,7 +1124,7 @@ void vsi_nn_SaveTensorToText
|
|||
sz = vsi_nn_GetElementNum( tensor );
|
||||
vsi_nn_SaveDataToText( filename, data, sz,
|
||||
tensor->attr.dtype.vx_type, seperator );
|
||||
free( data );
|
||||
vsi_nn_safe_free( data );
|
||||
} /* vsi_nn_SaveTensorToText() */
|
||||
|
||||
void vsi_nn_SaveDataToText
|
||||
|
|
@ -1219,7 +1219,7 @@ void vsi_nn_SaveTensorToBinary
|
|||
}
|
||||
fwrite( data, sz, 1, fp );
|
||||
fclose( fp );
|
||||
free( data );
|
||||
vsi_nn_safe_free( data );
|
||||
} /* vsi_nn_SaveTensorToBinary() */
|
||||
|
||||
vsi_nn_tensor_t * vsi_nn_CreateTensorFromData
|
||||
|
|
@ -1539,7 +1539,7 @@ void vsi_nn_TransposeTensor
|
|||
VSILOGE( "Copy transpose data fail with code %#x.", status );
|
||||
}
|
||||
|
||||
free( buf );
|
||||
vsi_nn_safe_free( buf );
|
||||
free( dst );
|
||||
} /* vsi_nn_TransposeTensor() */
|
||||
|
||||
|
|
@ -1588,7 +1588,7 @@ void vsi_nn_PermuteTensor
|
|||
if( perm[i] >= dim_num )
|
||||
{
|
||||
VSILOGW( "Incorrect perm %d", perm[i] );
|
||||
if( buf ) { free(buf); buf = NULL; }
|
||||
vsi_nn_safe_free( buf );
|
||||
if( dst ) { free(dst); dst = NULL; }
|
||||
return;
|
||||
}
|
||||
|
|
@ -1603,7 +1603,7 @@ void vsi_nn_PermuteTensor
|
|||
VSILOGE( "Copy permute data fail with code %#x.", status );
|
||||
}
|
||||
|
||||
if( buf ) { free(buf); buf = NULL; }
|
||||
vsi_nn_safe_free( buf );
|
||||
if( dst ) { free(dst); dst = NULL; }
|
||||
} /* vsi_nn_PermuteTensor() */
|
||||
|
||||
|
|
@ -2241,7 +2241,7 @@ void vsi_nn_reshuffle_weight_data
|
|||
}
|
||||
vsi_nn_CopyDataToTensor( graph, weights, weight_data );
|
||||
vsi_nn_Free( buffer );
|
||||
vsi_nn_Free( weight_data );
|
||||
vsi_nn_safe_free( weight_data );
|
||||
}
|
||||
|
||||
vsi_nn_tensor_t* vsi_nn_ConcatTensor_impl
|
||||
|
|
|
|||
Loading…
Reference in New Issue