diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f105a65
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,335 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
+
+# User-specific files
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+*-[Dd]ebug/
+*-[Dd]ebugPublic/
+*-[Rr]elease/
+*-[Rr]eleases/
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Build results
+*.o
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+NNApi0.3/
+NNApi0.4/
+OpenVX1.2/
+bazel-bin
+bazel-genfiles
+bazel-out
+bazel-ovxlib
+bazel-testlogs
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# VS code
+.vscode
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUNIT
+*.VisualState.xml
+TestResult.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+**/Properties/launchSettings.json
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_i.h
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# JustCode is a .NET coding add-in
+.JustCode
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# TypeScript v1 declaration files
+typings/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# JetBrains Rider
+.idea/
+*.sln.iml
+
+# CodeRush
+.cr/
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output 
+ASALocalRun/
+
+# IDE
+.settings/
diff --git a/src/tim/vx/internal/include/interface/ops.def b/src/tim/vx/internal/include/interface/ops.def
index 0a5077b..88c74c2 100644
--- a/src/tim/vx/internal/include/interface/ops.def
+++ b/src/tim/vx/internal/include/interface/ops.def
@@ -144,3 +144,5 @@ DEF_OP(PRE_PROCESS_YUV444)
 DEF_OP(PRE_PROCESS_NV12)
 DEF_OP(SCATTER_ND)
 DEF_OP(DECONVOLUTION1D)
+DEF_OP(INTERP)
+DEF_OP(RESIZE_1D)
diff --git a/src/tim/vx/internal/include/internal/internal_ops.def b/src/tim/vx/internal/include/internal/internal_ops.def
index 2a1ac9e..e8f677b 100644
--- a/src/tim/vx/internal/include/internal/internal_ops.def
+++ b/src/tim/vx/internal/include/internal/internal_ops.def
@@ -14,3 +14,5 @@ DEF_OP(RESIZE_NEAREST_INTERNAL)
 DEF_OP(DEPTH2SPACE_INTERNAL)
 DEF_OP(GRUCELL_ACTIVATION_INTERNAL)
 DEF_OP(GRUCELL_ACTIVATION_INTERNAL_SMA)
+DEF_OP(RESIZE_1D_BILINEAR_INTERNAL)
+DEF_OP(RESIZE_1D_NEAREST_INTERNAL)
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_elu.h b/src/tim/vx/internal/include/ops/vsi_nn_op_elu.h
index 87cf3aa..b4ebd3c 100644
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_elu.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_elu.h
@@ -44,6 +44,7 @@ typedef struct _vsi_nn_elu_param
 {
     /* elu layer local data structure */
     vsi_nn_elu_lcl_data local;
+    float  alpha;
 } vsi_nn_elu_param;
 
 #ifdef __cplusplus
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_interp.h b/src/tim/vx/internal/include/ops/vsi_nn_op_interp.h
new file mode 100644
index 0000000..5f1bfb2
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_interp.h
@@ -0,0 +1,44 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_INTERP_H
+#define _VSI_NN_OP_INTERP_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_interp_param
+{
+    struct _interp_local_data_t* local;
+    int32_t   height;  //height of output
+    int32_t   width;   //width of output
+    int32_t   zoom_factor; // zoom factor
+    int32_t   shrink_factor; // shrink factor
+    int32_t   pad_beg; //padding at begin of input
+    int32_t   pad_end; //padding at end of intput
+} vsi_nn_interp_param;
+
+
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d.h b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d.h
new file mode 100644
index 0000000..e85aa74
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d.h
@@ -0,0 +1,44 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_RESIZE_1D_H
+#define _VSI_NN_OP_RESIZE_1D_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_resize_1d_param
+{
+    struct _resize_1d_local_data_t* local;
+    vsi_enum     type;
+    float        factor;
+    int32_t      size[2];
+    vsi_bool    align_corners;
+    vsi_bool    half_pixel_centers;
+} vsi_nn_resize_1d_param;
+
+_compiler_assert(offsetof(vsi_nn_resize_1d_param, local) == 0, \
+    vsi_nn_resize_1d_h );
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_bilinear_internal.h b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_bilinear_internal.h
new file mode 100644
index 0000000..4e119c8
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_bilinear_internal.h
@@ -0,0 +1,42 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
+#define _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_resize_1d_bilinear_internal_param
+{
+    struct _resize_1d_bilinear_internal_local_data_t* local;
+    vsi_bool    align_corners;
+    vsi_bool    half_pixel_centers;
+    float        factor;
+} vsi_nn_resize_1d_bilinear_internal_param;
+
+_compiler_assert(offsetof(vsi_nn_resize_1d_bilinear_internal_param, local) == 0, \
+    vsi_nn_resize_1d_bilinear_internal_h );
+
+#endif
+
diff --git a/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_nearest_internal.h b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_nearest_internal.h
new file mode 100644
index 0000000..cc94051
--- /dev/null
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_nearest_internal.h
@@ -0,0 +1,42 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
+#define _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_resize_1d_nearest_internal_param
+{
+    struct _resize_1d_nearest_internal_local_data_t* local;
+    vsi_bool    align_corners;
+    vsi_bool    half_pixel_centers;
+    float        factor;
+} vsi_nn_resize_1d_nearest_internal_param;
+
+_compiler_assert(offsetof(vsi_nn_resize_1d_nearest_internal_param, local) == 0, \
+    vsi_nn_resize_1d_nearest_internal_h );
+
+#endif
+
diff --git a/src/tim/vx/internal/include/utils/vsi_nn_util.h b/src/tim/vx/internal/include/utils/vsi_nn_util.h
index 93180c7..de9d470 100644
--- a/src/tim/vx/internal/include/utils/vsi_nn_util.h
+++ b/src/tim/vx/internal/include/utils/vsi_nn_util.h
@@ -195,13 +195,6 @@ OVXLIB_API vsi_bool vsi_nn_CheckFilePath
     const char *path
     );
 
-OVXLIB_API void vsi_nn_GetFP32MultiAndPostShift
-    (
-    vx_float32 mult,
-    vx_uint16 *M0,
-    vx_int8 *N
-    );
-
 /**
  * Malloc aligned buffer
  * Malloc address and size aligned buffer.
diff --git a/src/tim/vx/internal/include/vsi_nn_graph_optimization.h b/src/tim/vx/internal/include/vsi_nn_graph_optimization.h
index bdf2e5a..1f43353 100644
--- a/src/tim/vx/internal/include/vsi_nn_graph_optimization.h
+++ b/src/tim/vx/internal/include/vsi_nn_graph_optimization.h
@@ -32,6 +32,13 @@
 extern "C" {
 #endif
 
+vx_tensor vsi_nn_CreateRawTensorFromData
+    (
+    vsi_nn_graph_t       * graph,
+    uint8_t             * data,
+    vsi_nn_tensor_attr_t * attr
+    );
+
 vsi_status vsi_nn_OptimizeGraph
     (
     vsi_nn_graph_t* graph,
diff --git a/src/tim/vx/internal/include/vsi_nn_node_type.h b/src/tim/vx/internal/include/vsi_nn_node_type.h
index 6304280..9f13725 100644
--- a/src/tim/vx/internal/include/vsi_nn_node_type.h
+++ b/src/tim/vx/internal/include/vsi_nn_node_type.h
@@ -158,6 +158,10 @@
 #include "ops/vsi_nn_op_squeeze.h"
 #include "ops/vsi_nn_op_expand_broadcast.h"
 #include "ops/vsi_nn_op_deconvolution1d.h"
+#include "ops/vsi_nn_op_interp.h"
+#include "ops/vsi_nn_op_resize_1d.h"
+#include "ops/vsi_nn_op_resize_1d_bilinear_internal.h"
+#include "ops/vsi_nn_op_resize_1d_nearest_internal.h"
 /* custom node head define define */
 #include "custom/vsi_nn_custom_node_type.h"
 
@@ -302,6 +306,10 @@ typedef union _vsi_nn_nn_param
     vsi_nn_squeeze_param            squeeze;
     vsi_nn_expand_broadcast_param   expand_broadcast;
     vsi_nn_deconvolution1d_param    deconvolution1d;
+    vsi_nn_interp_param             interp;
+    vsi_nn_resize_1d_param          resize_1d;
+    vsi_nn_resize_1d_bilinear_internal_param resize_1d_bilinear_internal;
+    vsi_nn_resize_1d_nearest_internal_param resize_1d_nearest_internal;
     uint8_t                         client_param[128];
 
     /* custom node data struct define */
diff --git a/src/tim/vx/internal/include/vsi_nn_version.h b/src/tim/vx/internal/include/vsi_nn_version.h
index 5e68dec..da62e48 100644
--- a/src/tim/vx/internal/include/vsi_nn_version.h
+++ b/src/tim/vx/internal/include/vsi_nn_version.h
@@ -33,7 +33,7 @@ extern "C"{
 
 #define VSI_NN_VERSION_MAJOR 1
 #define VSI_NN_VERSION_MINOR 1
-#define VSI_NN_VERSION_PATCH 28
+#define VSI_NN_VERSION_PATCH 30
 #define VSI_NN_VERSION \
     (VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)
 
diff --git a/src/tim/vx/internal/src/Android.mk b/src/tim/vx/internal/src/Android.mk
index 235a845..61345a2 100644
--- a/src/tim/vx/internal/src/Android.mk
+++ b/src/tim/vx/internal/src/Android.mk
@@ -117,6 +117,7 @@ LOCAL_C_INCLUDES += \
 LOCAL_CFLAGS :=  \
     -DLINUX \
     -D'OVXLIB_API=__attribute__((visibility("default")))' \
+    -DANDROID_SDK_VERSION=$(PLATFORM_SDK_VERSION)\
         -Wno-sign-compare \
         -Wno-implicit-function-declaration \
         -Wno-sometimes-uninitialized \
diff --git a/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c b/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
index b2afa0a..c0de129 100644
--- a/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
@@ -168,12 +168,14 @@ static vx_param_description_t kernel_param_def[] =
     {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
     {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
     {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
 };
 
 #define SCALAR_INPUT_SCALE           (2)
 #define SCALAR_INPUT_TAIL            (3)
 #define SCALAR_OUTPUT_SCALE          (4)
 #define SCALAR_OUTPUT_ZP             (5)
+#define SCALAR_ALPHA                 (6)
 #define _CL_PARAM_NUM          _cnt_of_array(kernel_param_def)
 
 /*
@@ -293,6 +295,7 @@ static vsi_nn_kernel_node_t _setup
     float inputTail = (float)inputs[0]->attr.dtype.zero_point * inputScale;
     float outputScale = outputs[0]->attr.dtype.scale;
     float outputZP = (float)outputs[0]->attr.dtype.zero_point + 0.5f;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
 
     ret = vsi_nn_kernel_optimize_element_shape(
             (int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
@@ -331,6 +334,8 @@ static vsi_nn_kernel_node_t _setup
                     graph, F32, &outputScale );
             node_params[SCALAR_OUTPUT_ZP] = vsi_nn_kernel_scalar_create(
                     graph, F32, &outputZP );
+            node_params[SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
+                    graph, F32, &alpha );
 
             /* Pass parameters to node. */
             status  = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
@@ -369,6 +374,11 @@ OnError:
         vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_ZP] );
     }
 
+    if (node_params[SCALAR_ALPHA])
+    {
+        vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALPHA] );
+    }
+
     return node;
 } /* _setup() */
 
diff --git a/src/tim/vx/internal/src/kernel/cl/moments_cl.c b/src/tim/vx/internal/src/kernel/cl/moments_cl.c
index 8a72060..d258e39 100644
--- a/src/tim/vx/internal/src/kernel/cl/moments_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/moments_cl.c
@@ -356,7 +356,8 @@ static vsi_nn_kernel_node_t _setup
     int32_t  out_shape[VSI_NN_MAX_DIM_NUM] = {0};
     int32_t  out_rs_flg = 0;
     int32_t axis_num  = 0;
-    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
+    size_t axis_num_temp = 0;
+    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
     int32_t keep_dim  = vsi_nn_kernel_param_get_int32( params, "keep_dim" );
     int32_t first_axis = axis[0];
     int32_t i = 0;
@@ -369,6 +370,8 @@ static vsi_nn_kernel_node_t _setup
     float input_scale = inputs[0]->attr.dtype.scale;
     float dim_ratio = (float)1.0 / (float)(width * height);
 
+    axis_num = (int32_t)axis_num_temp;
+
     if(inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP)
     {
         if (inputs[0]->attr.dtype.fl > 0)
diff --git a/src/tim/vx/internal/src/kernel/cl/resize_1d_bilinear_cl.c b/src/tim/vx/internal/src/kernel/cl/resize_1d_bilinear_cl.c
new file mode 100644
index 0000000..f8ff904
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/resize_1d_bilinear_cl.c
@@ -0,0 +1,305 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+#define _RESIZE_1D_BILINEAR_KERNEL_SOURCE()      "resize_1d_bilinear"
+
+#define STR(a) #a
+// Add kernel hashtable here
+#define RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
+        (( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) )
+
+#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
+          CVIVANTE_NAMESPACE("cl.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
+          _RESIZE_1D_BILINEAR_KERNEL_SOURCE() }
+
+typedef struct
+{
+    uint32_t key;
+    char * function_name;
+    const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _resize_1d_bilinear_kernel_map[] =
+{
+    // Register kernel here
+    PACK_KERNEL_MAP( F32, F32),
+    PACK_KERNEL_MAP( U8,  U8),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_BILINEAR_PARAM_NUM  _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
+
+
+#define SCALAR_SCALE_X         (2)
+#define SCALAR_HALF_PIXEL      (3)
+#define SCALAR_INPUT_SCALE     (4)
+#define SCALAR_INPUT_TAIL      (5)
+#define SCALAR_OUTPUT_SCALE    (6)
+#define SCALAR_OUTPUT_TAIL     (7)
+
+
+#define RESIZE_1D_BILINEAR_NUM         4
+#define RESIZE_1D_BILINEAR_QUANT_NUM   _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
+
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_resize_1d_bilinear_initializer)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    gpu_param_t gpu_param = {
+        3,
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0}
+        };
+    vsi_nn_kernel_tensor_attr_t * output_attr   = NULL;
+    vsi_int_array_t * out_shape                 = NULL;
+
+    output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
+    CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
+
+    out_shape  = output_attr->shape;
+
+    gpu_param.global_scale[0]  = 1;
+    gpu_param.global_scale[1]  = 1;
+    gpu_param.global_scale[2]  = 1;
+
+    gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
+    gpu_param.global_size[0] = gpu_align_p2(
+            (out_shape->data[0] + gpu_param.global_scale[0] - 1)
+            / gpu_param.global_scale[0], 4);
+    gpu_param.global_size[1] = (
+            (out_shape->data[1] + gpu_param.global_scale[1] - 1)
+            / gpu_param.global_scale[1]);
+    gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
+    status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+
+final:
+#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
+    SAFE_FREE_TENSOR_ATTR(output_attr);
+    return status;
+} /* _resize_1d_bilinear_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs,
+    vsi_bool *is_use_u8_kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_dtype_e in_dtype  = F16;
+    vsi_nn_kernel_dtype_e out_dtype = F16;
+    const _kernel_map_type * kernel_map = _resize_1d_bilinear_kernel_map;
+    size_t kernel_map_size              = _cnt_of_array( _resize_1d_bilinear_kernel_map );
+    vx_param_description_t * param_def  = _resize_1d_bilinear_kernel_param_def;
+    size_t param_def_size               = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
+    vx_kernel_initialize_f  initializer = _resize_1d_bilinear_initializer;
+
+    uint32_t key = 0;
+    uint32_t i   = 0;
+
+    in_dtype  = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+    out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+    if (F16 == in_dtype)
+    {
+        in_dtype = F32;
+    }
+    if (F16 == out_dtype)
+    {
+        out_dtype = F32;
+    }
+
+
+    if ((U8 == in_dtype) || (U8 == out_dtype))
+    {
+        param_def_size = RESIZE_1D_BILINEAR_QUANT_NUM;
+        *is_use_u8_kernel = TRUE;
+    }
+    else
+    {
+        param_def_size = RESIZE_1D_BILINEAR_NUM;
+        *is_use_u8_kernel = FALSE;
+    }
+
+    key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype );
+
+    for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+    {
+        if ( kernel_map[i].key == key )
+        {
+            break;
+        }
+    }
+    if ( i < (uint32_t)kernel_map_size )
+    {
+        snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  kernel_map[i].function_name );
+        kernel->info.parameters  = param_def;
+        kernel->info.numParams   = (uint32_t)param_def_size;
+        kernel->info.initialize  = initializer;
+        // Register code source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
+                kernel_map[i].source_name );
+        // Register binary source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+                kernel_map[i].source_name );
+        status = VSI_SUCCESS;
+    }
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[RESIZE_1D_BILINEAR_QUANT_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+    int32_t in_width     = inputs[0]->attr.size[0];
+    int32_t out_width    = outputs[0]->attr.size[0];
+    float   input_zp     = (float)inputs[0]->attr.dtype.zero_point;
+    float   input_scale  = inputs[0]->attr.dtype.scale;
+    float   input_tail   = -(input_zp * input_scale);
+    float   output_zp    = (float)outputs[0]->attr.dtype.zero_point;
+    float   output_scale = (0 == outputs[0]->attr.dtype.scale) ? 1.0f : 1.0f / outputs[0]->attr.dtype.scale;
+    float   half_pixel_value = 0.0f;
+    float   scale_factor_x = 0.0f;
+    vsi_bool is_use_u8_kernel = FALSE;
+
+    if (align_corners && out_width > 1)
+    {
+        scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
+    }
+    else
+    {
+        scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
+    }
+
+    if (half_pixel_centers)
+    {
+        half_pixel_value = 0.5f;
+    }
+    else
+    {
+        half_pixel_value = 0.0f;
+    }
+
+    status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            size_t node_params_num = RESIZE_1D_BILINEAR_NUM;
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, RESIZE_1D_BILINEAR_QUANT_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_SCALE_X]    = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
+            node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
+            if (is_use_u8_kernel)
+            {
+                node_params[SCALAR_INPUT_SCALE]  = vsi_nn_kernel_scalar_create( graph, F32, &input_scale );
+                node_params[SCALAR_INPUT_TAIL]   = vsi_nn_kernel_scalar_create(graph, F32, &input_tail );
+                node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
+                node_params[SCALAR_OUTPUT_TAIL]  = vsi_nn_kernel_scalar_create(graph, F32, &output_zp );
+                node_params_num = RESIZE_1D_BILINEAR_QUANT_NUM;
+            }
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+            if (is_use_u8_kernel)
+            {
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_SCALE] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
+            }
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( resize_1d_bilinear, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cl/resize_1d_nearest_cl.c b/src/tim/vx/internal/src/kernel/cl/resize_1d_nearest_cl.c
new file mode 100644
index 0000000..5b0f9a4
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cl/resize_1d_nearest_cl.c
@@ -0,0 +1,312 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+typedef enum
+{
+    INTERNAL_KERNEL_RESIZE_1D_NEAREST,
+} _internal_kernel_e;
+
+#define _RESIZE_1D_NEAREST_KERNEL_SOURCE      "resize_1d_nearest"
+
+#define STR(a) #a
+// Add kernel hashtable here
+#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
+        (( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
+
+#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
+          CVIVANTE_NAMESPACE("cl.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
+          _RESIZE_1D_NEAREST_KERNEL_SOURCE }
+
+typedef struct
+{
+    uint32_t key;
+    char * function_name;
+    const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
+{
+    // Register kernel here
+    PACK_KERNEL_MAP( F32, F32),
+    PACK_KERNEL_MAP( U8,  U8),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_NEAREST_PARAM_NUM  5
+#define _RESIZE_1D_NEAREST_QUANT_NUM  _cnt_of_array( _resize_1d_nearest_kernel_param_def )
+
+#define SCALAR_SCALE_X         (2)
+#define SCALAR_HALF_PIXEL      (3)
+#define SCALAR_ROUND_VALUE     (4)
+#define SCALAR_SCALE_VALUE     (5)
+#define SCALAR_TAIL_VALUE      (6)
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    gpu_param_t gpu_param = {
+        3,
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0}
+        };
+    vsi_nn_kernel_tensor_attr_t * output_attr   = NULL;
+    vsi_int_array_t * out_shape                 = NULL;
+
+    output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
+    CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
+
+    out_shape  = output_attr->shape;
+
+    gpu_param.global_scale[0]  = 1;
+    gpu_param.global_scale[1]  = 1;
+    gpu_param.global_scale[2]  = 1;
+
+    gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
+    gpu_param.global_size[0] = gpu_align_p2(
+            (out_shape->data[0] + gpu_param.global_scale[0] - 1)
+            / gpu_param.global_scale[0], 4);
+    gpu_param.global_size[1] = (
+            (out_shape->data[1] + gpu_param.global_scale[1] - 1)
+            / gpu_param.global_scale[1]);
+    gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
+    status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+
+final:
+#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
+    SAFE_FREE_TENSOR_ATTR(output_attr);
+    return status;
+} /* _resize_1d_nearest_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs,
+    vsi_bool *is_use_u8_kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_dtype_e in_dtype  = F16;
+    vsi_nn_kernel_dtype_e out_dtype = F16;
+    const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
+    size_t kernel_map_size              = _cnt_of_array( _resize_1d_nearest_kernel_map );
+    vx_param_description_t * param_def  = _resize_1d_nearest_kernel_param_def;
+    size_t param_def_size               = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
+    vx_kernel_initialize_f  initializer = _resize_1d_nearest_initializer;
+
+    uint32_t key = 0;
+    uint32_t i   = 0;
+
+    in_dtype  = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+    out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+    if (F16 == in_dtype)
+    {
+        in_dtype = F32;
+    }
+    if (F16 == out_dtype)
+    {
+        out_dtype = F32;
+    }
+
+    if ((U8 == in_dtype) || (U8 == out_dtype))
+    {
+        param_def_size = _RESIZE_1D_NEAREST_QUANT_NUM;
+        *is_use_u8_kernel = TRUE;
+    }
+    else
+    {
+        param_def_size = _RESIZE_1D_NEAREST_PARAM_NUM;
+        *is_use_u8_kernel = FALSE;
+    }
+
+    key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype );
+
+    for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+    {
+        if ( kernel_map[i].key == key )
+        {
+            break;
+        }
+    }
+    if ( i < (uint32_t)kernel_map_size )
+    {
+        snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  kernel_map[i].function_name );
+        kernel->info.parameters  = param_def;
+        kernel->info.numParams   = (uint32_t)param_def_size;
+        kernel->info.initialize  = initializer;
+        // Register code source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
+                kernel_map[i].source_name );
+        // Register binary source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+                kernel_map[i].source_name );
+        status = VSI_SUCCESS;
+    }
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_QUANT_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+    int32_t in_width     = inputs[0]->attr.size[0];
+    int32_t out_width    = outputs[0]->attr.size[0];
+    float   input_zp     = (float)inputs[0]->attr.dtype.zero_point;
+    float   input_scale  = inputs[0]->attr.dtype.scale;
+    float   output_scale = (0 == outputs[0]->attr.dtype.scale) ? \
+                           input_scale : input_scale / outputs[0]->attr.dtype.scale;
+    float   output_tail  = (float)outputs[0]->attr.dtype.zero_point - input_zp * output_scale;
+    float   half_pixel_value = 0.0f;
+    float   round_value    = 0.0f;
+    float   scale_factor_x = 0.0f;
+    vsi_bool is_use_u8_kernel = FALSE;
+
+    if (align_corners && out_width > 1)
+    {
+        scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
+    }
+    else
+    {
+        scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
+    }
+
+    if (align_corners)
+    {
+        round_value = 0.5f;
+    }
+    else
+    {
+        round_value = 0.0f;
+    }
+
+    if (half_pixel_centers)
+    {
+        half_pixel_value = 0.5f;
+    }
+    else
+    {
+        half_pixel_value = 0.0f;
+    }
+
+    status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            size_t node_params_num = _RESIZE_1D_NEAREST_PARAM_NUM;
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_SCALE_X]     = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
+            node_params[SCALAR_HALF_PIXEL]  = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
+            node_params[SCALAR_ROUND_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &round_value );
+            if (is_use_u8_kernel)
+            {
+                node_params[SCALAR_SCALE_VALUE]  = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
+                node_params[SCALAR_TAIL_VALUE]   = vsi_nn_kernel_scalar_create(graph, F32, &output_tail );
+                node_params_num = _RESIZE_1D_NEAREST_QUANT_NUM;
+            }
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ROUND_VALUE] );
+            if (is_use_u8_kernel)
+            {
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_VALUE] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_TAIL_VALUE] );
+            }
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( resize_1d_nearest, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c b/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c
index f52f367..03c1711 100644
--- a/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c
+++ b/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c
@@ -49,7 +49,7 @@ typedef enum
 } unary_type_e;
 
 
-#define _CPU_ARG_NUM            (1)
+#define _CPU_ARG_NUM            (2)
 #define _CPU_INPUT_NUM          (1)
 #define _CPU_OUTPUT_NUM         (1)
 #define _CPU_IO_NUM             (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
@@ -71,9 +71,9 @@ static float log_eval(float data)
     return logf(data);
 }
 
-static float elu_eval(float data)
+static float elu_eval(float data, float alpha)
 {
-    return data >=0 ? data : expf(data) - 1;
+    return data >=0 ? data : expf(data) * alpha - alpha;
 }
 
 static float neg_eval(float data)
@@ -114,6 +114,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
     size_t out_elements = 0;
     vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
     int32_t i;
+    float alpha = 0;
     int32_t unary_type = 0;
 
     tensors[0]  = (vsi_nn_kernel_tensor_t)param[0];
@@ -126,6 +127,8 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
 
     status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &unary_type);
     CHECK_STATUS_FAIL_GOTO(status, final );
+    status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[3], &alpha);
+    CHECK_STATUS_FAIL_GOTO(status, final );
 
     buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
     CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
@@ -151,7 +154,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
             data = log_eval(data);
             break;
         case UNARY_ELU:
-            data = elu_eval(data);
+            data = elu_eval(data, alpha);
             break;
         case UNARY_NEG:
             data = neg_eval(data);
@@ -193,9 +196,11 @@ static vx_param_description_t kernel_param_def[] =
     {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
     {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
     {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
 };
 
 #define INPUT_FUNC_TYPE           (2)
+#define INPUT_SCALAR_ALPHA        (3)
 
 static const vx_kernel_description_t _kernel_info =
 {
@@ -237,6 +242,7 @@ static vsi_nn_kernel_node_t _setup
     vsi_status status = VSI_SUCCESS;
     vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
     vsi_nn_kernel_node_t node = NULL;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
 
     status = _query_kernel( inputs, outputs, kernel );
     if( VSI_SUCCESS == status)
@@ -249,10 +255,13 @@ static vsi_nn_kernel_node_t _setup
                     inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
             backend_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
                     graph, I32, &unary_type );
+            backend_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
+                    graph, F32, &alpha );
             /* Pass parameters to node. */
             status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );
 
             vsi_nn_kernel_scalar_release( &backend_params[INPUT_FUNC_TYPE] );
+            vsi_nn_kernel_scalar_release( &backend_params[INPUT_SCALAR_ALPHA] );
         }
         else
         {
diff --git a/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c b/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c
index b9450c9..f1124bf 100644
--- a/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c
+++ b/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c
@@ -258,11 +258,14 @@ static vsi_nn_kernel_node_t _setup
     vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
     vsi_nn_kernel_node_t node = NULL;
     int32_t axis_num  = 0;
-    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
+    size_t axis_num_temp = 0;
+    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
     vsi_bool is_continue_axis = TRUE;
     uint32_t mask = 0;
     int32_t i = 0;
 
+    axis_num = (int32_t)axis_num_temp;
+
     for ( i = 1; i < axis_num; i++)
     {
         if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)
diff --git a/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c b/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c
new file mode 100644
index 0000000..df91d90
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c
@@ -0,0 +1,271 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+#define _KERNEL_NAME        CVIVANTE_NAMESPACE("cpu.resize_1d_bilinear")
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_BILINEAR_PARAM_NUM  _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
+
+#define SCALAR_ALIGN_CORNERS         (2)
+#define SCALAR_HALF_PIXEL            (3)
+
+/*
+ * Kernel function
+ */
+DEF_KERNEL_EXECUTOR(_compute)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
+    float *f32_in_buffer[_INPUT_NUM] = {NULL};
+    float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
+    vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
+    size_t   out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
+    size_t   out_elements[_OUTPUT_NUM] = {0};
+    size_t   out_bytes[_OUTPUT_NUM] = {0};
+    uint32_t i = 0;
+    int32_t  align_corners = 0;
+    int32_t  half_pixel_centers = 0;
+    float    width_scale = 1.0f;
+    uint32_t input_width = 0, output_width = 0;
+    uint32_t w = 0, out = 0;
+    uint32_t output_dims = 0;
+    float    data00 = .0f, data01 = .0f, interpolation = .0f;
+    uint32_t index = 0;
+    uint32_t outer = 0;
+
+    /* prepare data */
+    for (i = 0; i < _INPUT_NUM; i ++)
+    {
+        input[i] = (vsi_nn_kernel_tensor_t)param[i];
+        in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
+        f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
+        CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
+
+    }
+    for (i = 0; i < _OUTPUT_NUM; i ++)
+    {
+        output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
+        out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
+        vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
+        out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
+        out_bytes[i] = out_elements[i] * sizeof(float);
+        f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
+        CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
+        memset( f32_out_buffer[i], 0, out_bytes[i] );
+    }
+
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
+    input_width       = in_attr[0]->shape->data[0];
+    output_width      = out_attr[0]->shape->data[0];
+    output_dims       = (uint32_t)out_attr[0]->shape->size;
+
+    if (align_corners && output_width > 1)
+    {
+        width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
+    }
+    else
+    {
+        width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
+    }
+
+    outer = 1;
+
+    for (i = 1; i < output_dims; i++)
+    {
+        outer = outer * out_attr[0]->shape->data[i];
+    }
+
+    for (out = 0; out < outer; out++)
+    {
+        vx_int32 input_base = out * input_width;
+        vx_int32 output_base = out * output_width;
+        for (w = 0; w < output_width; w ++)
+        {
+            vx_float32 input_w;
+            vx_int32 w0;
+            vx_int32 w1;
+            if (half_pixel_centers)
+            {
+                input_w = ((vx_float32)w + 0.5f) * width_scale - 0.5f;
+            }
+            else
+            {
+                input_w = w * width_scale;
+            }
+            w0 = (vx_int32)input_w;
+            w1 = input_w < 0 ? 0 : vsi_nn_min(w0 + 1, (vx_int32)(input_width - 1));
+            index = input_base + w0;
+            data00 = f32_in_buffer[0][index];
+            index = input_base + w1;
+            data01 = f32_in_buffer[0][index];
+
+            interpolation = data00 * (1 - (input_w - w0)) +
+                            data01 * (input_w - w0);
+            index = output_base + w;
+            f32_out_buffer[0][index] = interpolation;
+        }
+    }
+
+
+    /* save data */
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
+                f32_out_buffer[i], out_elements[i] );
+        CHECK_STATUS_FAIL_GOTO( status, final );
+    }
+
+final:
+    for (i = 0; i < _INPUT_NUM; i++)
+    {
+        if (f32_in_buffer[i])
+        {
+            free(f32_in_buffer[i]);
+            f32_in_buffer[i] = NULL;
+        }
+        if (in_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
+        }
+    }
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        if (f32_out_buffer[i])
+        {
+            free(f32_out_buffer[i]);
+            f32_out_buffer[i] = NULL;
+        }
+        if (out_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
+        }
+    }
+
+    return status;
+} /* _compute() */
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs
+    /* Add extra params */
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  _KERNEL_NAME );
+    kernel->info.function    = _compute;
+    kernel->info.parameters  = _resize_1d_bilinear_kernel_param_def;
+    kernel->info.numParams   = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
+    status = VSI_SUCCESS;
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_BILINEAR_PARAM_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+
+    status = _query_kernel( kernel, inputs, outputs );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_BILINEAR_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
+            node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_BILINEAR_PARAM_NUM );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CPU( resize_1d_bilinear, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c b/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c
new file mode 100644
index 0000000..44e45a7
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c
@@ -0,0 +1,271 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+#define _KERNEL_NAME        CVIVANTE_NAMESPACE("cpu.resize_1d_nearest")
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_NEAREST_PARAM_NUM  _cnt_of_array( _resize_1d_nearest_kernel_param_def )
+
+#define SCALAR_ALIGN_CORNERS         (2)
+#define SCALAR_HALF_PIXEL            (3)
+
+/*
+ * Kernel function
+ */
+DEF_KERNEL_EXECUTOR(_compute)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
+    float *f32_in_buffer[_INPUT_NUM] = {NULL};
+    float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
+    vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
+    size_t   out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
+    size_t   out_elements[_OUTPUT_NUM] = {0};
+    size_t   out_bytes[_OUTPUT_NUM] = {0};
+    uint32_t i = 0;
+    int32_t  align_corners = 0;
+    int32_t  half_pixel_centers = 0;
+    float    width_scale = 1.0f;
+    uint32_t input_width = 0, output_width = 0;
+    uint32_t w = 0, out = 0;
+    uint32_t output_dims = 0;
+    uint32_t outer = 0;
+    /* prepare data */
+    for (i = 0; i < _INPUT_NUM; i ++)
+    {
+        input[i] = (vsi_nn_kernel_tensor_t)param[i];
+        in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
+        f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
+        CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
+
+    }
+    for (i = 0; i < _OUTPUT_NUM; i ++)
+    {
+        output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
+        out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
+        vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
+        out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
+        out_bytes[i] = out_elements[i] * sizeof(float);
+        f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
+        CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
+        memset( f32_out_buffer[i], 0, out_bytes[i] );
+    }
+
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
+    input_width       = in_attr[0]->shape->data[0];
+    output_width      = out_attr[0]->shape->data[0];
+    output_dims       = (uint32_t)out_attr[0]->shape->size;
+
+    if (align_corners && output_width > 1)
+    {
+        width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
+    }
+    else
+    {
+        width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
+    }
+
+    outer = 1;
+
+    for (i = 1; i < output_dims; i++)
+    {
+        outer = outer * out_attr[0]->shape->data[i];
+    }
+
+    for (out = 0; out < outer; out++)
+    {
+        vx_int32 input_base  = out * input_width;
+        vx_int32 output_base = out * output_width;
+
+        for (w = 0; w < output_width; w ++)
+        {
+            float      input_w;
+            uint32_t   in_x;
+            int32_t    in_index;
+            int32_t    out_index;
+
+            if (half_pixel_centers)
+            {
+                input_w = ((float)w + 0.5f) * width_scale;
+            }
+            else
+            {
+                input_w = w * width_scale;
+            }
+            if (align_corners)
+            {
+                in_x = vsi_nn_min((uint32_t)simple_round(input_w), input_width - 1);
+            }
+            else
+            {
+                in_x = vsi_nn_min((uint32_t)floorf(input_w), input_width - 1);
+            }
+            in_index    = in_x + input_base;
+            out_index   = w + output_base;
+            f32_out_buffer[0][out_index] = f32_in_buffer[0][in_index];
+        }
+    }
+
+    /* save data */
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
+                f32_out_buffer[i], out_elements[i] );
+        CHECK_STATUS_FAIL_GOTO( status, final );
+    }
+
+final:
+    for (i = 0; i < _INPUT_NUM; i++)
+    {
+        if (f32_in_buffer[i])
+        {
+            free(f32_in_buffer[i]);
+            f32_in_buffer[i] = NULL;
+        }
+        if (in_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
+        }
+    }
+
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        if (f32_out_buffer[i])
+        {
+            free(f32_out_buffer[i]);
+            f32_out_buffer[i] = NULL;
+        }
+        if (out_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
+        }
+    }
+
+    return status;
+} /* _compute() */
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs
+    /* Add extra params */
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  _KERNEL_NAME );
+    kernel->info.function    = _compute;
+    kernel->info.parameters  = _resize_1d_nearest_kernel_param_def;
+    kernel->info.numParams   = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
+    status = VSI_SUCCESS;
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM];
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+
+    status = _query_kernel( kernel, inputs, outputs );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
+            node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CPU( resize_1d_nearest, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/evis/add_mean_std_norm_evis.c b/src/tim/vx/internal/src/kernel/evis/add_mean_std_norm_evis.c
index 501e860..97aa183 100644
--- a/src/tim/vx/internal/src/kernel/evis/add_mean_std_norm_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/add_mean_std_norm_evis.c
@@ -279,10 +279,10 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
 
     if( U8 == input_dtype && F16 == output_dtype )
     {
-        vx_uint16  M0                 = 0;
-        vx_int8    postShift          = 0;
-        vx_uint32  multAndoutZP0[2]   = {0};
-        vx_uint32  multAndoutZP1[2]   = {0};
+        uint16_t   M0                 = 0;
+        int32_t    postShift          = 0;
+        uint32_t   multAndoutZP0[2]   = {0};
+        uint32_t   multAndoutZP1[2]   = {0};
 
         gpu_dp_inst_t uniU8MulAndPostShift_0_Lo_2x8 = {{
             0xdddddddd, // TCfg
@@ -305,12 +305,12 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
             0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
         }, GPU_DP_TYPE_16};
 
-        vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);
         multAndoutZP0[0] = (vx_uint32)(M0);
         multAndoutZP0[1] = (vx_uint32)((output_ZP << postShift) - input_ZP * M0);
         uniU8MulAndPostShift_0_Lo_2x8.data[7] |= (postShift & 0x1F);
 
-        vsi_nn_GetFP32MultiAndPostShift(scaleIn1 / scaleOut, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(scaleIn1 / scaleOut, &M0, &postShift);
         multAndoutZP1[0] = (vx_uint32)(M0);
         multAndoutZP1[1] = (vx_uint32)((output_ZP << postShift) - input_ZP1 * M0);
         uniU8MulAndPostShift_1_Lo_2x8.data[7] |= (postShift & 0x1F);
diff --git a/src/tim/vx/internal/src/kernel/evis/clip_evis.c b/src/tim/vx/internal/src/kernel/evis/clip_evis.c
index 78d52fc..f0c673a 100644
--- a/src/tim/vx/internal/src/kernel/evis/clip_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/clip_evis.c
@@ -268,7 +268,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
         {
             uint32_t  multAndoutZP[2]    = {0};
             uint16_t  M0                 = 0;
-            int8_t    postShift          = 0;
+            int32_t   postShift          = 0;
             gpu_dp_inst_t  uniDataMulAndPostShift_2x8 = {{
                 0xdddddddd, // TCfg
                 0x44444444, // ASelt
@@ -279,7 +279,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
                 0x00000000, 0x00000000, 0x00000000, 0x00000000,
                 0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
             }, GPU_DP_TYPE_16};
-            vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift);
+            gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);
 
             multAndoutZP[0] = (uint32_t)(M0);
             multAndoutZP[1] = (uint32_t)(output_ZP << postShift );
@@ -434,8 +434,8 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
         int32_t   packedMaxData[4];
         float     uint8Scale = scaleIn / scaleOut;
         uint16_t  M0                   = 0;
-        int8_t    postShift            = 0;
-        uint32_t    multAndoutZP[2]    = {0};
+        int32_t   postShift            = 0;
+        uint32_t  multAndoutZP[2]      = {0};
         gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
             0xdddddddd, // TCfg
             0x44444444, // ASelt
@@ -457,7 +457,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
             0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
         }, GPU_DP_TYPE_16};
 
-        vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
         multAndoutZP[0] = (uint32_t)(M0);
         multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);
 
diff --git a/src/tim/vx/internal/src/kernel/evis/depthwise_conv1d_evis.c b/src/tim/vx/internal/src/kernel/evis/depthwise_conv1d_evis.c
index 9c8cbab..2c7c4f6 100644
--- a/src/tim/vx/internal/src/kernel/evis/depthwise_conv1d_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/depthwise_conv1d_evis.c
@@ -702,27 +702,66 @@ static vsi_nn_kernel_node_t _setup
     )
 {
     vsi_status status = VSI_FAILURE;
-    vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM];
+    vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM] = {NULL};
     vsi_nn_kernel_node_t node = NULL;
     int32_t weight_pad_front[VSI_NN_MAX_DIM_NUM] = {0};
     int32_t weight_pad_end[VSI_NN_MAX_DIM_NUM] = {0};
     vsi_nn_tensor_t * weights = NULL;
     vsi_nn_tensor_t * biases = NULL;
     vsi_nn_tensor_t *temp_tensor[3] = {NULL};
+    vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
+    int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
+    int32_t new_rank = 2;
+    uint32_t i = 0;
     int32_t stride     = vsi_nn_kernel_param_get_int32( params, "stride" );
     int32_t pad_front  = vsi_nn_kernel_param_get_int32( params, "pad_front" );
     int32_t pad_end  = vsi_nn_kernel_param_get_int32( params, "pad_end" );
     int32_t dilation   = vsi_nn_kernel_param_get_int32( params, "dilation" );
     _internal_kernel_size_e ks   = KN;
 
-    weight_pad_end[0] = gpu_align_np2_safe(inputs[1]->attr.size[0], 8) - inputs[1]->attr.size[0];
+    if (!((VSI_NN_TYPE_UINT8 == inputs[0]->attr.dtype.vx_type)
+       && (VSI_NN_TYPE_UINT8 == inputs[1]->attr.dtype.vx_type)
+       && (NULL == inputs[2] || VSI_NN_TYPE_INT32 == inputs[2]->attr.dtype.vx_type)
+       && (VSI_NN_TYPE_UINT8 == outputs[0]->attr.dtype.vx_type)))
+    {
+        return NULL;
+    }
 
-    weights = vsi_nn_pad_tensor(graph, inputs[1], weight_pad_front, weight_pad_end, inputs[1]->attr.dim_num,
-        VSI_NN_PAD_MODE_CONSTANT, 0);
+    reshape_tensors[0] = inputs[0];
 
-    biases = vsi_nn_merge_input_zeropoint_to_bias(graph, inputs[0], inputs[1], inputs[2]);
+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
+        shape[0] = inputs[1]->attr.size[0];
+        shape[1] = 1;
+        for (i = 1; i < inputs[1]->attr.dim_num; i++)
+        {
+            shape[1] *= inputs[1]->attr.size[i];
+        }
+        reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
+                inputs[1], (uint32_t*)shape, new_rank );
+    }
+    else
+    {
+        reshape_tensors[1] = inputs[1];
+    }
 
-    temp_tensor[0] = inputs[0];
+    if (inputs[2] && inputs[2]->attr.dim_num == 1)
+    {
+        shape[0] = inputs[2]->attr.size[0];
+        shape[1] = 1;
+        new_rank = 2;
+        reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
+                inputs[2], (uint32_t*)shape, new_rank );
+    }
+
+    weight_pad_end[0] = gpu_align_np2_safe(reshape_tensors[1]->attr.size[0], 8) - reshape_tensors[1]->attr.size[0];
+
+    weights = vsi_nn_pad_tensor(graph, reshape_tensors[1], weight_pad_front, weight_pad_end,
+        reshape_tensors[1]->attr.dim_num, VSI_NN_PAD_MODE_CONSTANT, 0);
+
+    biases = vsi_nn_merge_input_zeropoint_to_bias(graph, reshape_tensors[0], reshape_tensors[1], reshape_tensors[2]);
+
+    temp_tensor[0] = reshape_tensors[0];
     temp_tensor[1] = weights;
     temp_tensor[2] = biases;
 
@@ -760,6 +799,16 @@ static vsi_nn_kernel_node_t _setup
         }
     }
 
+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
+        vsi_nn_ReleaseTensor( &reshape_tensors[1] );
+    }
+
+    if (inputs[2] && inputs[2]->attr.dim_num == 1)
+    {
+        vsi_nn_ReleaseTensor( &reshape_tensors[2] );
+    }
+
     if (weights)
     {
         vsi_nn_ReleaseTensor(&weights);
diff --git a/src/tim/vx/internal/src/kernel/evis/detect_post_box_evis.c b/src/tim/vx/internal/src/kernel/evis/detect_post_box_evis.c
index 4d67bf3..6cba0a0 100644
--- a/src/tim/vx/internal/src/kernel/evis/detect_post_box_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/detect_post_box_evis.c
@@ -164,9 +164,9 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
     else if ((U8 == input_attr->dtype) || (U8 == input1_attr->dtype))
     {
         uint16_t  M0                 = 0;
-        int8_t    postShift0         = 0;
+        int32_t   postShift0         = 0;
         uint16_t  M1                 = 0;
-        int8_t    postShift1         = 0;
+        int32_t   postShift1         = 0;
         uint32_t  i                  = 0;
         gpu_dp_inst_t uniU8SubZptoF32Conv0_4x4 = {{
             0x09090909, // TCfg
@@ -188,8 +188,8 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
             0x00010001, 0x00000000, 0x00010001, 0x00000000,
             0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
         }, GPU_DP_TYPE_16 };
-        vsi_nn_GetFP32MultiAndPostShift(scaleIn0, &M0, &postShift0);
-        vsi_nn_GetFP32MultiAndPostShift(scaleIn1, &M1, &postShift1);
+        gpu_quantize_multiplier_16bit(scaleIn0, &M0, &postShift0);
+        gpu_quantize_multiplier_16bit(scaleIn1, &M1, &postShift1);
         uniU8SubZptoF32Conv0_4x4.data[7] |= (postShift0 & 0x1F);
         uniU8SubZptoF32Conv1_4x4.data[7] |= (postShift1 & 0x1F);
         for ( i = 0; i < 8; i++ )
diff --git a/src/tim/vx/internal/src/kernel/evis/eltwise_unary_evis.c b/src/tim/vx/internal/src/kernel/evis/eltwise_unary_evis.c
index 995455c..e6831f7 100644
--- a/src/tim/vx/internal/src/kernel/evis/eltwise_unary_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/eltwise_unary_evis.c
@@ -266,9 +266,11 @@ static vx_param_description_t kernel_param_def[] =
     {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
     {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
     {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
 };
 
 #define INPUT_FUNC_TYPE           (2)
+#define INPUT_SCALAR_ALPHA        (3)
 #define _CL_PARAM_NUM          _cnt_of_array(kernel_param_def)
 
 /*
@@ -296,6 +298,7 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
     float    inputTail                      = 0;
     float    outputScale                    = 1.0f;
     float    outputZP                       = 0;
+    float    alpha                          = 0;
     uint32_t pack_key;
 
     attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
@@ -303,7 +306,9 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
     attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
     CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );
 
-    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &type);
+    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[INPUT_FUNC_TYPE], &type);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+    status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[INPUT_SCALAR_ALPHA], &alpha);
     CHECK_STATUS_FAIL_GOTO(status, final );
 
     out_shape  = attr[1]->shape;
@@ -408,6 +413,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
                     "uniConvBF16toF32_Part1_2x8", &uniConvBF16toF32_Part1_2x8 );
             status |= vsi_nn_kernel_gpu_add_param( node,
                     "uniExtractOddData_2x8", &uniExtractOddData_2x8 );
+            status |= vsi_nn_kernel_gpu_add_param( node,
+                    "alpha", &alpha );
             CHECK_STATUS_FAIL_GOTO(status, final );
         }
         break;
@@ -466,6 +473,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
                     "outputScale", &outputScale );
             status |= vsi_nn_kernel_gpu_add_param( node,
                     "outputZP", &outputZP );
+            status |= vsi_nn_kernel_gpu_add_param( node,
+                    "alpha", &alpha );
 
             if (attr[1]->dtype == F16)
             {
@@ -555,7 +564,8 @@ static vsi_nn_kernel_node_t _setup
     vsi_nn_tensor_t* rs_tensors[2] = { NULL };
     int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
     int32_t new_rank = 0;
-    vsi_bool ret;
+    vsi_bool ret = FALSE;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
 
     ret = vsi_nn_kernel_optimize_element_shape(
             (int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
@@ -586,6 +596,8 @@ static vsi_nn_kernel_node_t _setup
                     rs_tensors, 1, &rs_tensors[1], 1 );
             node_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
                     graph, I32, &unary_type );
+            node_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
+                    graph, F32, &alpha );
 
             /* Pass parameters to node. */
             status  = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
@@ -609,6 +621,11 @@ OnError:
         vsi_nn_kernel_scalar_release( &node_params[INPUT_FUNC_TYPE] );
     }
 
+    if (node_params[INPUT_SCALAR_ALPHA])
+    {
+        vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_ALPHA] );
+    }
+
     return node;
 } /* _setup() */
 
diff --git a/src/tim/vx/internal/src/kernel/evis/matrixmul_evis.c b/src/tim/vx/internal/src/kernel/evis/matrixmul_evis.c
index 12bac55..53fce38 100644
--- a/src/tim/vx/internal/src/kernel/evis/matrixmul_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/matrixmul_evis.c
@@ -181,8 +181,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
     float       dstScale   = 0;
     uint16_t M0            = 0;
     uint16_t M1            = 0;
-    int8_t   postShift0    = 0;
-    int8_t   postShift1    = 0;
+    int32_t  postShift0    = 0;
+    int32_t  postShift1    = 0;
 
     uint32_t pack_key = 0;
     int32_t  ac2zero  = 0;
@@ -279,8 +279,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
         dstScale = 1;
         dstZP = 0.0f;
     }
-    vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postShift0);
-    vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postShift1);
+    gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postShift0);
+    gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postShift1);
 
     mulKIn0In1Zp = (float)((int)(K + 3) / 4 * 4 * src1ZP * src0ZP);
     inOutScale =  src0Scale * src1Scale / dstScale;
diff --git a/src/tim/vx/internal/src/kernel/evis/moments_evis.c b/src/tim/vx/internal/src/kernel/evis/moments_evis.c
index 032d473..4416328 100644
--- a/src/tim/vx/internal/src/kernel/evis/moments_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/moments_evis.c
@@ -588,7 +588,8 @@ static vsi_nn_kernel_node_t _setup
     vsi_nn_kernel_node_param_t node_params[_MOMENTS_PARAM_NUM] = { NULL };
     vsi_nn_kernel_node_t node = NULL;
     int32_t axis_num  = 0;
-    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
+    size_t axis_num_temp = 0;
+    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
     int32_t axis_first  = axis[0];
     int32_t shapes[2][VSI_NN_MAX_DIM_NUM] = { { 1, 1, 1, 1 } };
     vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
@@ -602,6 +603,8 @@ static vsi_nn_kernel_node_t _setup
     vsi_bool image_2d = FALSE;
     vsi_bool is_continue_axis = TRUE;
 
+    axis_num = (int32_t)axis_num_temp;
+
     for ( i = 1; i < axis_num; i++)
     {
         if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)
diff --git a/src/tim/vx/internal/src/kernel/evis/poolwithargmax_evis.c b/src/tim/vx/internal/src/kernel/evis/poolwithargmax_evis.c
index db0aea8..603be47 100644
--- a/src/tim/vx/internal/src/kernel/evis/poolwithargmax_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/poolwithargmax_evis.c
@@ -139,7 +139,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
     int32_t  input_fl                          = 0;
     int32_t  output_fl                         = 0;
     uint16_t M0                                = 0;
-    int8_t   postShift                         = 0;
+    int32_t  postShift                         = 0;
     float    inputScale                        = 1.0f;
     int32_t  input_ZP                          = 0;
     float    outputScale                       = 1.0f;
@@ -193,7 +193,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
     if ( ( input_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM )
        && ( output_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM ) )
     {
-        vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);
     }
 
     image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);
diff --git a/src/tim/vx/internal/src/kernel/evis/pow_evis.c b/src/tim/vx/internal/src/kernel/evis/pow_evis.c
index 701d23e..35b25a0 100644
--- a/src/tim/vx/internal/src/kernel/evis/pow_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/pow_evis.c
@@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
     float       dstZP      = 0;
     float       dstScale   = 1.0f;
 
-    int8_t     postshift0  = 0;
-    int8_t     postshift1  = 0;
+    int32_t    postshift0  = 0;
+    int32_t    postshift1  = 0;
     float      outScale_fl = 1;
 
     uint16_t M0            = 0;
@@ -229,7 +229,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
         src0ZP     = attr[0]->asymm.zero_point;
         src0Scale  = attr[0]->asymm.scale;
 
-        vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postshift0);
+        gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postshift0);
     }
 
     if ( attr[1]->quant == VSI_NN_KERNEL_QUANT_DFP )
@@ -243,7 +243,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
         src1ZP     = attr[1]->asymm.zero_point;
         src1Scale  = attr[1]->asymm.scale;
 
-        vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postshift1);
+        gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postshift1);
     }
 
     if ( attr[2]->quant == VSI_NN_KERNEL_QUANT_DFP )
diff --git a/src/tim/vx/internal/src/kernel/evis/resize_1d_bilinear_evis.c b/src/tim/vx/internal/src/kernel/evis/resize_1d_bilinear_evis.c
new file mode 100644
index 0000000..f1166dd
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/evis/resize_1d_bilinear_evis.c
@@ -0,0 +1,1344 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+#include "utils/vsi_nn_dtype_util_prv.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+typedef enum
+{
+    DOWN = 0,
+    DOWN_2X_SAME,
+    DOWN_2X_HALF_SAME,
+    UP,
+    UP_OPT,
+    UP_2X_SAME,
+    UP_2X_HALF_SAME,
+    UP_4X_SAME,
+    UP_4X_HALF_SAME,
+    UP_8X_SAME,
+    UP_8X_HALF_SAME,
+} _internal_scale_e;
+
+#define _RESIZE_BILINEAR_KERNEL_SOURCE(_input_type)      "resize_1d_bilinear_"#_input_type
+#define _RESIZE_BILINEAR_KERNEL_SOURCE_OPT(_input_type)  "resize_1d_bilinear_"#_input_type"_opt"
+#define _RESIZE_BILINEAR_KERNEL_SOURCE_UP_NX()  "resize_1d_bilinear_UP_NX"
+#define _RESIZE_BILINEAR_KERNEL_SOURCE_DOWN_NX()  "resize_1d_bilinear_DOWN_NX"
+
+#define STR(a) #a
+// Add kernel hashtable here
+#define RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE, scale_flag ) \
+        (( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) | (scale_flag))
+
+#define PACK_KERNEL_MAP_DOWN( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE, DOWN ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_DOWN"), \
+          _RESIZE_BILINEAR_KERNEL_SOURCE(IN_DTYPE) }
+
+#define PACK_KERNEL_MAP_UP( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE, UP ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_UP"), \
+          _RESIZE_BILINEAR_KERNEL_SOURCE(IN_DTYPE) }
+
+#define PACK_KERNEL_MAP_UP_OPT( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE, UP_OPT ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_UP_opt"), \
+          _RESIZE_BILINEAR_KERNEL_SOURCE_OPT(IN_DTYPE) }
+
+#define PACK_KERNEL_MAP_UP_NX( IN_DTYPE, OUT_DTYPE, MODE_TYPE ) \
+        { RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE, MODE_TYPE ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_"STR(MODE_TYPE)), \
+          _RESIZE_BILINEAR_KERNEL_SOURCE_UP_NX() }
+
+#define PACK_KERNEL_MAP_DOWN_NX( IN_DTYPE, OUT_DTYPE, MODE_TYPE ) \
+        { RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE, MODE_TYPE ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_"STR(MODE_TYPE)), \
+          _RESIZE_BILINEAR_KERNEL_SOURCE_DOWN_NX() }
+
+typedef struct
+{
+    uint32_t key;
+    char * function_name;
+    const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _resize_1d_bilinear_kernel_map[] =
+{
+    PACK_KERNEL_MAP_DOWN(I8, I8),
+    PACK_KERNEL_MAP_DOWN(I16, I16),
+    PACK_KERNEL_MAP_DOWN(U8, F16),
+    PACK_KERNEL_MAP_DOWN(U8, U8),
+    PACK_KERNEL_MAP_DOWN(F16, F16),
+    PACK_KERNEL_MAP_DOWN(F16, U8),
+    PACK_KERNEL_MAP_DOWN(BF16, BF16),
+    PACK_KERNEL_MAP_UP(I8, I8),
+    PACK_KERNEL_MAP_UP(I16, I16),
+    PACK_KERNEL_MAP_UP(U8, U8),
+    PACK_KERNEL_MAP_UP(F16, F16),
+    PACK_KERNEL_MAP_UP(BF16, BF16),
+    PACK_KERNEL_MAP_UP_OPT(U8, U8),
+    PACK_KERNEL_MAP_UP_NX(U8, U8, UP_2X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(U8, U8, UP_2X_SAME),
+    PACK_KERNEL_MAP_UP_NX(I8, I8, UP_2X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(I8, I8, UP_2X_SAME),
+    PACK_KERNEL_MAP_UP_NX(I16, I16, UP_2X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(I16, I16, UP_2X_SAME),
+    PACK_KERNEL_MAP_UP_NX(F16, F16, UP_2X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(F16, F16, UP_2X_SAME),
+    PACK_KERNEL_MAP_UP_NX(U8, U8, UP_4X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(U8, U8, UP_4X_SAME),
+    PACK_KERNEL_MAP_UP_NX(I8, I8, UP_4X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(I8, I8, UP_4X_SAME),
+    PACK_KERNEL_MAP_UP_NX(I16, I16, UP_4X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(I16, I16, UP_4X_SAME),
+    PACK_KERNEL_MAP_UP_NX(F16, F16, UP_4X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(F16, F16, UP_4X_SAME),
+    PACK_KERNEL_MAP_UP_NX(U8, U8, UP_8X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(U8, U8, UP_8X_SAME),
+    PACK_KERNEL_MAP_UP_NX(I8, I8, UP_8X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(I8, I8, UP_8X_SAME),
+    PACK_KERNEL_MAP_UP_NX(I16, I16, UP_8X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(I16, I16, UP_8X_SAME),
+    PACK_KERNEL_MAP_UP_NX(F16, F16, UP_8X_HALF_SAME),
+    PACK_KERNEL_MAP_UP_NX(F16, F16, UP_8X_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(U8,  U8,  DOWN_2X_HALF_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(U8,  U8,  DOWN_2X_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(I8,  I8,  DOWN_2X_HALF_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(I8,  I8,  DOWN_2X_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(I16, I16, DOWN_2X_HALF_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(I16, I16, DOWN_2X_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(F16, F16, DOWN_2X_HALF_SAME),
+    PACK_KERNEL_MAP_DOWN_NX(F16, F16, DOWN_2X_SAME),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_BILINEAR_PARAM_NUM  _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
+#define _RESIZE_NO_SCALE_PARAM_NUM      4
+#define _RESIZE_1D_NX_KERENL_PARAM_NUM  3
+
+#define SCALAR_ALIGN_CORNERS         (2)
+#define SCALAR_HALF_PIXEL            (3)
+#define SCALAR_TENSOR_SCALE          (4)
+#define SCALAR_SCALE_TYPE            (2)
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_resize_1d_bilinear_initializer)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    gpu_param_t gpu_param = {
+        3,
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0}
+        };
+    vsi_nn_kernel_tensor_attr_t * output_attr   = NULL;
+    vsi_nn_kernel_tensor_attr_t * input_attr    = NULL;
+    vsi_int_array_t             * out_shape     = NULL;
+    vsi_int_array_t             * in_shape      = NULL;
+    vsi_nn_kernel_dtype_e         input_dtype   = F16;
+    vsi_nn_kernel_dtype_e         output_dtype  = F16;
+    int32_t align_corners = 0;
+    int32_t half_pixel_centers = 0;
+
+    uint32_t    depth              = 0;
+    int32_t     srcFixPointPos     = 0;
+    int32_t     dstFixPointPos     = 0;
+    float       input_scale        = 1.0;
+    int32_t     inputZP            = 0;
+    float       output_scale       = 1.0;
+    int32_t     outputZP           = 0;
+    float       scale_factor       = 1.0f;
+    uint32_t    in_width           = 1;
+    uint32_t    out_width          = 1;
+    uint32_t    out_height         = 1;
+    float       half_pixel_value = 0.0f;
+    vsi_bool    is_use_scale_kernel = (vsi_bool)(_RESIZE_1D_BILINEAR_PARAM_NUM == param_size);
+    _internal_scale_e scale_flag = DOWN;
+    vsi_bool    is_run_nx_kernel    = (vsi_bool)(_RESIZE_1D_NX_KERENL_PARAM_NUM == param_size);
+    int32_t     scale_type_value = 0;
+
+    input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
+    CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
+    output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
+    CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
+
+    if (is_run_nx_kernel)
+    {
+        status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &scale_type_value);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+        scale_flag = (_internal_scale_e)scale_type_value;
+    }
+    else
+    {
+        status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &align_corners);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+        status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &half_pixel_centers);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    out_shape     = output_attr->shape;
+    in_shape      = input_attr->shape;
+    input_dtype   = input_attr->dtype;
+    output_dtype  = output_attr->dtype;
+
+    in_width          = in_shape->data[0];
+    depth             = in_shape->data[2];
+    out_width         = out_shape->data[0];
+    out_height        = out_shape->data[1];
+
+    if (align_corners && out_width > 1)
+    {
+        scale_factor = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
+    }
+    else
+    {
+        scale_factor = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
+    }
+
+    if (half_pixel_centers)
+    {
+        half_pixel_value = 0.5f;
+    }
+    else
+    {
+        half_pixel_value = 0.0f;
+    }
+
+    if (VSI_NN_KERNEL_QUANT_ASYMM == input_attr->quant )
+    {
+        input_scale    = input_attr->asymm.scale;
+        inputZP        = input_attr->asymm.zero_point;
+    }
+    else if (VSI_NN_KERNEL_QUANT_DFP == input_attr->quant)
+    {
+        srcFixPointPos   = input_attr->dfp.fl;
+        if (srcFixPointPos >= 0)
+        {
+            input_scale = 1.0f / (vx_float32) ((int64_t)1 << srcFixPointPos);
+        }
+        else if (srcFixPointPos < 0)
+        {
+            input_scale = (vx_float32)((int64_t)1 << -srcFixPointPos);
+        }
+        inputZP = 0;
+    }
+    else
+    {
+        input_scale = 1.0f;
+        inputZP     = 0;
+    }
+
+    if (VSI_NN_KERNEL_QUANT_ASYMM == output_attr->quant )
+    {
+        output_scale   = output_attr->asymm.scale;
+        outputZP       = output_attr->asymm.zero_point;
+    }
+    else if (VSI_NN_KERNEL_QUANT_DFP == output_attr->quant)
+    {
+        dstFixPointPos = output_attr->dfp.fl;
+        if (dstFixPointPos >= 0)
+        {
+            output_scale = (vx_float32) ((int64_t)1 << dstFixPointPos);
+        }
+        else if (dstFixPointPos < 0)
+        {
+            output_scale = 1.0f / (vx_float32) ((int64_t)1 << -dstFixPointPos);
+        }
+        outputZP = 0;
+    }
+    else
+    {
+        output_scale = 1.0;
+        outputZP     = 0;
+    }
+
+    if (is_run_nx_kernel)
+    {
+        gpu_param.global_scale[0] = 8;
+        gpu_param.global_scale[1] = 1;
+    }
+    else
+    {
+        gpu_param.global_scale[0] = 4;
+        gpu_param.global_scale[1] = 1;
+        gpu_param.global_scale[2] = 1;
+    }
+
+    if (is_run_nx_kernel)
+    {
+        gpu_dp_inst_t uniResize2xUp_half_2x8 = {{
+            0x55555555, // TCfg
+            0x00000000, // ASelt
+            0x32212110, 0x54434332, // ABin
+            0xaaaaaaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x3a003400, 0x34003a00, 0x3a003400, 0x34003a00,
+            0x3a003400, 0x34003a00, 0x3a003400, 0x34003a00 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize2xUp_2x8 = {{
+            0x51515151, // TCfg
+            0x00000000, // ASelt
+            0x21011000, 0x43033202, // ABin
+            0xa2a2a2a2, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x00003c00, 0x38003800, 0x00003c00, 0x38003800,
+            0x00003c00, 0x38003800, 0x00003c00, 0x38003800 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize4xUp_half_2x8 = {{
+            0x55555555, // TCfg
+            0x00000000, // ASelt
+            0x21211010, 0x32322121, // ABin
+            0xaaaaaaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x39003600, 0x3b003000, 0x30003b00, 0x36003900,
+            0x39003600, 0x3b003000, 0x30003b00, 0x36003900 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize4xUp_2x8 = {{
+            0x55515551, // TCfg
+            0x00000000, // ASelt
+            0x10101000, 0x21212101, // ABin
+            0xaaa2aaa2, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x00003c00, 0x34003a00, 0x38003800, 0x3a003400,
+            0x00003c00, 0x34003a00, 0x38003800, 0x3a003400 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize8xUp_half_2x8 = {{
+            0x55555555, // TCfg
+            0x00000000, // ASelt
+            0x10101010, 0x21212121, // ABin
+            0xaaaaaaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x38803700, 0x39803500, 0x3a803200, 0x3b802c00,
+            0x2c003b80, 0x32003a80, 0x35003980, 0x37003880 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize8xUp_2x8 = {{
+            0x55555551, // TCfg
+            0x00000000, // ASelt
+            0x10101000, 0x10101010, // ABin
+            0xaaaaaaa2, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x00003c00, 0x30003b00, 0x34003a00, 0x36003900,
+            0x38003800, 0x39003600, 0x3a003400, 0x3b003000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize2xDown_8bit_half_2x8 = {{
+            0x55555555, // TCfg
+            0x00000000, // ASelt
+            0x76543210, 0xfedcba98, // ABin
+            0xaaaaaaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x38003800, 0x38003800, 0x38003800, 0x38003800,
+            0x38003800, 0x38003800, 0x38003800, 0x38003800 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize2xDown_8bit_2x8 = {{
+            0x11111111, // TCfg
+            0x00000000, // ASelt
+            0x06040200, 0x0e0c0a08, // ABin
+            0x22222222, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x00003c00, 0x00003c00, 0x00003c00, 0x00003c00,
+            0x00003c00, 0x00003c00, 0x00003c00, 0x00003c00 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize2xDown_16bit_half_2x8 = {{
+            0x55555555, // TCfg
+            0x55550000, // ASelt
+            0x76543210, 0x76543210, // ABin
+            0xaaaaaaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x38003800, 0x38003800, 0x38003800, 0x38003800,
+            0x38003800, 0x38003800, 0x38003800, 0x38003800 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniResize2xDown_16bit_2x8 = {{
+            0x11111111, // TCfg
+            0x11110000, // ASelt
+            0x06040200, 0x06040200, // ABin
+            0x22222222, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x00003c00, 0x00003c00, 0x00003c00, 0x00003c00,
+            0x00003c00, 0x00003c00, 0x00003c00, 0x00003c00 // Constant
+        }, GPU_DP_TYPE_16};
+
+        status = vsi_nn_kernel_gpu_add_param( node, "out_height", &out_height);
+        if ( UP_2X_HALF_SAME == scale_flag )
+        {
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxUp_2x8", &uniResize2xUp_half_2x8);
+        }
+        else if ( UP_2X_SAME == scale_flag )
+        {
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxUp_2x8", &uniResize2xUp_2x8);
+        }
+        else if ( UP_4X_HALF_SAME == scale_flag )
+        {
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxUp_2x8", &uniResize4xUp_half_2x8);
+        }
+        else if ( UP_4X_SAME == scale_flag )
+        {
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxUp_2x8", &uniResize4xUp_2x8);
+        }
+        else if ( UP_8X_HALF_SAME == scale_flag )
+        {
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxUp_2x8", &uniResize8xUp_half_2x8);
+        }
+        else if ( UP_8X_SAME == scale_flag )
+        {
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxUp_2x8", &uniResize8xUp_2x8);
+        }
+        else if ( DOWN_2X_HALF_SAME == scale_flag )
+        {
+            if (I8 == input_dtype || U8 == input_dtype)
+            {
+                status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxDown_2x8", &uniResize2xDown_8bit_half_2x8);
+            }
+            else
+            {
+                status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxDown_2x8", &uniResize2xDown_16bit_half_2x8);
+            }
+        }
+        else if ( DOWN_2X_SAME == scale_flag )
+        {
+            if (I8 == input_dtype || U8 == input_dtype)
+            {
+                status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxDown_2x8", &uniResize2xDown_8bit_2x8);
+            }
+            else
+            {
+                status |= vsi_nn_kernel_gpu_add_param( node, "uniResizeNxDown_2x8", &uniResize2xDown_16bit_2x8);
+            }
+        }
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    else if (VSI_NN_KERNEL_QUANT_DFP == input_attr->quant)
+    {
+        float dfpScale = input_scale * output_scale;
+        gpu_dp_inst_t uniConvertDFP2FP32_4x4 = {{
+            0x01010101, // TCfg
+            0x00000000, // ASelt
+            0x00010000, 0x00030002, // ABin
+            0x02020202, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000300, // AccumType, ConstantType, and PostShift
+            0x00000001, 0x00000000, 0x00000001, 0x00000000,
+            0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniExtact8Bit_2x8 = {{
+            0x33333333, // TCfg
+            0x11110000, // ASelt
+            0x03020100, 0x03020100, // ABin
+            0x00000000, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00002400, // AccumType, ConstantType, and PostShift
+            0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        vx_uint32 uniConvertDFP2FP32_left_4x4[16] = {
+            0x01010101, // TCfg
+            0x00000000, // ASelt
+            0x00020000, 0x00060004, // ABin
+            0x02020202, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000400, // AccumType, ConstantType, and PostShift
+            0x00000001, 0x00000000, 0x00000001, 0x00000000,
+            0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
+        };
+        vx_uint32 uniConvertDFP2FP32_right_4x4[16] = {
+            0x01010101, // TCfg
+            0x00000000, // ASelt
+            0x00030001, 0x00070005, // ABin
+            0x02020202, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000400, // AccumType, ConstantType, and PostShift
+            0x00000001, 0x00000000, 0x00000001, 0x00000000,
+            0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
+        };
+        if (I8 == input_dtype && I8 == output_dtype && out_width > in_width)
+        {
+            gpu_dp_inst_t uniConvertI32toI16_2x8 = {{
+                0x33333333, // TCfg
+                0x11110000, // ASelt
+                0x03020100, 0x03020100, // ABin
+                0x00000000, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00002400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniGetMaskShift_2x8 = {{
+                0x99999999, // TCfg
+                0x00000000, // ASelt
+                0x03020100, 0x07060504, // ABin
+                0x55555555, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniConvertDFP2FP32_part1_4x4 = {{
+                0x09090909, // TCfg
+                0x00000000, // ASelt
+                0x00150004, 0x00370026, // ABin
+                0x0a0a0a0a, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000300, // AccumType, ConstantType, and PostShift
+                0x00010001, 0x00000000, 0x00010001, 0x00000000,
+                0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniConvertI32toI16_2x8", &uniConvertI32toI16_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniGetMaskShift_2x8", &uniGetMaskShift_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertDFP2FP32_4x4", &uniConvertDFP2FP32_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertDFP2FP32_part1_4x4",
+                                                 &uniConvertDFP2FP32_part1_4x4);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        else if (I16 == input_dtype && I16 == output_dtype && out_width > in_width)
+        {
+            gpu_dp_inst_t uniConvertI32toI16_2x8 = {{
+                0x33333333, // TCfg
+                0x11110000, // ASelt
+                0x03020100, 0x03020100, // ABin
+                0x00000000, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00002400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniGetMaskShift_2x8 = {{
+                0x99999999, // TCfg
+                0x00000000, // ASelt
+                0x03020100, 0x07060504, // ABin
+                0x55555555, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniConvertDFP2FP32_part1_4x4 = {{
+                0x09090909, // TCfg
+                0x00000000, // ASelt
+                0x00150004, 0x00370026, // ABin
+                0x0a0a0a0a, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000300, // AccumType, ConstantType, and PostShift
+                0x00010001, 0x00000000, 0x00010001, 0x00000000,
+                0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniConvertI32toI16_2x8", &uniConvertI32toI16_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniGetMaskShift_2x8", &uniGetMaskShift_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertDFP2FP32_4x4", &uniConvertDFP2FP32_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertDFP2FP32_part1_4x4",
+                                                 &uniConvertDFP2FP32_part1_4x4);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        else
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniConvertDFP2FP32_left_4x4",
+                                                   &uniConvertDFP2FP32_left_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertDFP2FP32_right_4x4",
+                                                   &uniConvertDFP2FP32_right_4x4);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+
+        status  = vsi_nn_kernel_gpu_add_param( node, "uniExtact8Bit_2x8", &uniExtact8Bit_2x8);
+        status |= vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        status |= vsi_nn_kernel_gpu_add_param( node, "dfpScale", &dfpScale);
+        status |= vsi_nn_kernel_gpu_add_param( node, "out_height", &out_height);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+        gpu_param.global_scale[1] = out_height;
+    }
+    else if (U8 == input_dtype && (U8 == output_dtype || F16 == output_dtype))
+    {
+        float   uint8Scale             = input_scale / output_scale;
+        float   uint8ZP_out            = (float)outputZP;
+        gpu_dp_inst_t uniExtact8Bit_2x8 = {{
+            0x33333333, // TCfg
+            0x11110000, // ASelt
+            0x03020100, 0x03020100, // ABin
+            0x00000000, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00002400, // AccumType, ConstantType, and PostShift
+            0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniU8SubZPtoFp32_4x4 = {{
+            0x09090909, // TCfg
+            0x04040404, // ASelt
+            0x00010000, 0x00030002, // ABin
+            0x0a0a0a0a, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000400, // AccumType, ConstantType, and PostShift
+            0x00010001, 0x00000000, 0x00010001, 0x00000000,
+            0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        vx_uint32 uniU8SubZPtoFp32_left_4x4[16] = {
+            0x09090909, // TCfg
+            0x04040404, // ASelt
+            0x00020000, 0x00060004, // ABin
+            0x0a0a0a0a, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000400, // AccumType, ConstantType, and PostShift
+            0x00010001, 0x00000000, 0x00010001, 0x00000000,
+            0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
+        };
+        vx_uint32 uniU8SubZPtoFp32_right_4x4[16] = {
+            0x09090909, // TCfg
+            0x04040404, // ASelt
+            0x00030001, 0x00070005, // ABin
+            0x0a0a0a0a, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000400, // AccumType, ConstantType, and PostShift
+            0x00010001, 0x00000000, 0x00010001, 0x00000000,
+            0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
+        };
+
+        if (F16 == output_dtype)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uint8Scale", &input_scale);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniU8SubZPtoFp32_left_4x4",  &uniU8SubZPtoFp32_left_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniU8SubZPtoFp32_right_4x4", &uniU8SubZPtoFp32_right_4x4);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        else
+        {
+            if (out_width > in_width)
+            {
+                gpu_dp_inst_t uniConvertI32toI16_2x8 = {{
+                    0x33333333, // TCfg
+                    0x11110000, // ASelt
+                    0x03020100, 0x03020100, // ABin
+                    0x00000000, // BSelt
+                    0x00000000, 0x00000000, // BBin
+                    0x00002400, // AccumType, ConstantType, and PostShift
+                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                    0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+                }, GPU_DP_TYPE_16};
+                gpu_dp_inst_t uniGetMaskShift_2x8 = {{
+                    0x99999999, // TCfg
+                    0x00000000, // ASelt
+                    0x03020100, 0x07060504, // ABin
+                    0x55555555, // BSelt
+                    0x00000000, 0x00000000, // BBin
+                    0x00000400, // AccumType, ConstantType, and PostShift
+                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                    0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+                }, GPU_DP_TYPE_16};
+                gpu_dp_inst_t uniU8SubZPtoFp32_part1_4x4 = {{
+                    0x09090909, // TCfg
+                    0x00000000, // ASelt
+                    0x00150004, 0x00370026, // ABin
+                    0x0a0a0a0a, // BSelt
+                    0x00000000, 0x00000000, // BBin
+                    0x00000400, // AccumType, ConstantType, and PostShift
+                    0x00010001, 0x00000000, 0x00010001, 0x00000000,
+                    0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
+                }, GPU_DP_TYPE_16};
+                vx_uint32 uniBilinear_4x4[16] = {
+                    0x05050505, // TCfg
+                    0x00000000, // ASelt
+                    0x00510040, 0x00730062, // ABin
+                    0x05050505, // BSelt
+                    0x00320010, 0x00760054, // BBin
+                    0x00000400, // AccumType, ConstantType, and PostShift
+                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                    0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+                };
+                status  = vsi_nn_kernel_gpu_add_param( node, "uniConvertI32toI16_2x8", &uniConvertI32toI16_2x8);
+                status |= vsi_nn_kernel_gpu_add_param( node, "uniGetMaskShift_2x8", &uniGetMaskShift_2x8);
+                if (is_use_scale_kernel)
+                {
+                    status |= vsi_nn_kernel_gpu_add_param( node, "uniBilinear_4x4", &uniBilinear_4x4);
+                }
+                else
+                {
+                    status |= vsi_nn_kernel_gpu_add_param( node, "uniU8SubZPtoFp32_4x4", &uniU8SubZPtoFp32_4x4);
+                    status |= vsi_nn_kernel_gpu_add_param( node,
+                              "uniU8SubZPtoFp32_part1_4x4", &uniU8SubZPtoFp32_part1_4x4);
+                }
+                CHECK_STATUS_FAIL_GOTO(status, final );
+            }
+            else
+            {
+                status  = vsi_nn_kernel_gpu_add_param( node, "uniU8SubZPtoFp32_left_4x4",
+                                                              &uniU8SubZPtoFp32_left_4x4);
+                status |= vsi_nn_kernel_gpu_add_param( node, "uniU8SubZPtoFp32_right_4x4",
+                                                              &uniU8SubZPtoFp32_right_4x4);
+                CHECK_STATUS_FAIL_GOTO(status, final );
+            }
+
+            if (!is_use_scale_kernel)
+            {
+                status  = vsi_nn_kernel_gpu_add_param( node, "uniExtact8Bit_2x8", &uniExtact8Bit_2x8);
+                status |= vsi_nn_kernel_gpu_add_param( node, "uint8Scale", &uint8Scale);
+                status |= vsi_nn_kernel_gpu_add_param( node, "output_ZP", &uint8ZP_out);
+                CHECK_STATUS_FAIL_GOTO(status, final );
+            }
+        }
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        if (!is_use_scale_kernel)
+        {
+            status |= vsi_nn_kernel_gpu_add_param( node, "input_ZP", &inputZP);
+        }
+        status |= vsi_nn_kernel_gpu_add_param( node, "out_height", &out_height);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+        gpu_param.global_scale[1] = out_height;
+
+    }
+    else if (F16 == input_dtype && (U8 == output_dtype || F16 == output_dtype))
+    {
+        float   uint8Scale             = 1.0f / output_scale;
+        float   uint8ZP_out            = (vx_float32)outputZP;
+        gpu_dp_inst_t uniExtact8Bit_2x8 = {{
+            0x33333333, // TCfg
+            0x11110000, // ASelt
+            0x03020100, 0x03020100, // ABin
+            0x00000000, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00002400, // AccumType, ConstantType, and PostShift
+            0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniFp16toFp32_4x4 = {{
+            0x01010101, // TCfg
+            0x00000000, // ASelt
+            0x00010000, 0x00030002, // ABin
+            0x02020202, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x00003c00, 0x00000000, 0x00003c00, 0x00000000,
+            0x00003c00, 0x00000000, 0x00003c00, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniRightSubLeft_4x4 = {{
+            0x09090909, // TCfg
+            0x04040404, // ASelt
+            0x00110000, 0x00330022, // ABin
+            0x0a0a0a0a, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x3c003c00, 0x00000000, 0x3c003c00, 0x00000000,
+            0x3c003c00, 0x00000000, 0x3c003c00, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniExtactHalf8_2x8 = {{
+            0x11111111, // TCfg
+            0x11110000, // ASelt
+            0x06040200, 0x06040200, // ABin
+            0x22222222, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000100, // AccumType, ConstantType, and PostShift
+            0x00003c00, 0x00003c00, 0x00003c00, 0x00003c00,
+            0x00003c00, 0x00003c00, 0x00003c00, 0x00003c00 // Constant
+        }, GPU_DP_TYPE_16};
+        vx_uint32 uniConvertFp2FP32_left_4x4[16] = {
+            0x01010101, // TCfg
+            0x00000000, // ASelt
+            0x00020000, 0x00060004, // ABin
+            0x02020202, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000400, // AccumType, ConstantType, and PostShift
+            0x00000001, 0x00000000, 0x00000001, 0x00000000,
+            0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
+        };
+        vx_uint32 uniConvertFp2FP32_right_4x4[16] = {
+            0x01010101, // TCfg
+            0x00000000, // ASelt
+            0x00030001, 0x00070005, // ABin
+            0x02020202, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000400, // AccumType, ConstantType, and PostShift
+            0x00000001, 0x00000000, 0x00000001, 0x00000000,
+            0x00000001, 0x00000000, 0x00000001, 0x00000000 // Constant
+        };
+        if (F16 == input_dtype && F16 == output_dtype && out_width > in_width)
+        {
+            gpu_dp_inst_t uniConvertI32toI16_2x8 = {{
+                0x33333333, // TCfg
+                0x11110000, // ASelt
+                0x03020100, 0x03020100, // ABin
+                0x00000000, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00002400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniGetMaskShift_2x8 = {{
+                0x99999999, // TCfg
+                0x00000000, // ASelt
+                0x03020100, 0x07060504, // ABin
+                0x55555555, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniFp16toFp32_part1_4x4 = {{
+                0x09090909, // TCfg
+                0x00000000, // ASelt
+                0x00150004, 0x00370026, // ABin
+                0x0a0a0a0a, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000100, // AccumType, ConstantType, and PostShift
+                0x3c003c00, 0x00000000, 0x3c003c00, 0x00000000,
+                0x3c003c00, 0x00000000, 0x3c003c00, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniConvertI32toI16_2x8", &uniConvertI32toI16_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniGetMaskShift_2x8", &uniGetMaskShift_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniFp16toFp32_4x4", &uniFp16toFp32_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniFp16toFp32_part1_4x4", &uniFp16toFp32_part1_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniExtactHalf8_2x8", &uniExtactHalf8_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        else if (F16 == output_dtype)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniExtactHalf8_2x8", &uniExtactHalf8_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniRightSubLeft_4x4", &uniRightSubLeft_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertFp2FP32_left_4x4",
+                                                          &uniConvertFp2FP32_left_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertFp2FP32_right_4x4",
+                                                          &uniConvertFp2FP32_right_4x4);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        else
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniExtact8Bit_2x8", &uniExtact8Bit_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniRightSubLeft_4x4", &uniRightSubLeft_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertFp2FP32_left_4x4",
+                                                         &uniConvertFp2FP32_left_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertFp2FP32_right_4x4",
+                                                         &uniConvertFp2FP32_right_4x4);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uint8Scale", &uint8Scale);
+            status |= vsi_nn_kernel_gpu_add_param( node, "output_ZP", &uint8ZP_out);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        status |= vsi_nn_kernel_gpu_add_param( node, "out_height", &out_height);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+        gpu_param.global_scale[1] = out_height;
+    }
+    else if (BF16 == input_dtype && BF16 == output_dtype)
+    {
+        if (out_width > in_width)
+        {
+            gpu_dp_inst_t uniConvertI32toI16_2x8 = {{
+                0x33333333, // TCfg
+                0x11110000, // ASelt
+                0x03020100, 0x03020100, // ABin
+                0x00000000, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00002400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniGetMaskShift_2x8 = {{
+                0x99999999, // TCfg
+                0x00000000, // ASelt
+                0x03020100, 0x07060504, // ABin
+                0x55555555, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000400, // AccumType, ConstantType, and PostShift
+                0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniConvBF16toF32_Part0_2x8 = {{
+                0x11111111, // TCfg
+                0x01010101, // ASelt
+                0x01050004, 0x03070206, // ABin
+                0x22222222, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000600, // AccumType, ConstantType, and PostShift
+                0x00000001, 0x00000001, 0x00000001, 0x00000001,
+                0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniConvBF16toF32_Part1_2x8 = {{
+                0x11111111, // TCfg
+                0x01010101, // ASelt
+                0x05050404, 0x07070606, // ABin
+                0x22222222, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000600, // AccumType, ConstantType, and PostShift
+                0x00000001, 0x00000001, 0x00000001, 0x00000001,
+                0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
+            }, GPU_DP_TYPE_16};
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniConvertI32toI16_2x8", &uniConvertI32toI16_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniGetMaskShift_2x8", &uniGetMaskShift_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvBF16toF32_Part0_2x8", &uniConvBF16toF32_Part0_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvBF16toF32_Part1_2x8", &uniConvBF16toF32_Part1_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        else
+        {
+            gpu_dp_inst_t uniConvBF16toF32_odd_2x8 = {{
+                0x11111111, // TCfg
+                0x01010101, // ASelt
+                0x02050004, 0x06070406, // ABin
+                0x22222222, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000600, // AccumType, ConstantType, and PostShift
+                0x00000001, 0x00000001, 0x00000001, 0x00000001,
+                0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
+            }, GPU_DP_TYPE_16};
+            gpu_dp_inst_t uniConvBF16toF32_even_2x8 = {{
+                0x11111111, // TCfg
+                0x01010101, // ASelt
+                0x03050104, 0x07070506, // ABin
+                0x22222222, // BSelt
+                0x00000000, 0x00000000, // BBin
+                0x00000600, // AccumType, ConstantType, and PostShift
+                0x00000001, 0x00000001, 0x00000001, 0x00000001,
+                0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
+            }, GPU_DP_TYPE_16};
+
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniConvBF16toF32_odd_2x8",  &uniConvBF16toF32_odd_2x8);
+            status |= vsi_nn_kernel_gpu_add_param( node, "uniConvBF16toF32_even_2x8", &uniConvBF16toF32_even_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+        status  = vsi_nn_kernel_gpu_add_param( node, "out_height", &out_height);
+        status |= vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+        gpu_param.global_scale[1] = out_height;
+    }
+    else
+    {
+        VSILOGE("input or output's format is not support");
+        status = VSI_FAILURE;
+        goto final;
+    }
+
+    if (!is_run_nx_kernel)
+    {
+        status  = vsi_nn_kernel_gpu_add_param( node, "half_pixel_value", &half_pixel_value);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+
+    if (is_run_nx_kernel)
+    {
+        gpu_param.global_size[0]   = gpu_align_p2((out_width  + \
+                                     gpu_param.global_scale[0] - 1) / gpu_param.global_scale[0], 4);
+        gpu_param.global_size[1]   = depth;
+        gpu_param.dim              = 2;
+    }
+    else
+    {
+        gpu_param.global_size[0]   = gpu_align_p2((out_width  + \
+                                     gpu_param.global_scale[0] - 1) / gpu_param.global_scale[0], 4);
+        gpu_param.global_size[1]   = (out_height + gpu_param.global_scale[1] - 1) / gpu_param.global_scale[1];
+        gpu_param.global_size[2]   = depth / gpu_param.global_scale[2];
+    }
+
+    status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+final:
+    if (input_attr) vsi_nn_kernel_tensor_attr_release( &input_attr );
+    if (output_attr) vsi_nn_kernel_tensor_attr_release( &output_attr );
+    return status;
+} /* _resize_1d_bilinear_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs,
+    vsi_bool is_same_type,
+    int32_t  align_corners ,
+    int32_t  half_pixel_centers,
+    vsi_bool *is_run_opt_kernel,
+    vsi_bool *is_run_nx_kernel,
+    int32_t *scale_flag_value
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_dtype_e in_dtype;
+    vsi_nn_kernel_dtype_e out_dtype;
+    const _kernel_map_type * kernel_map = _resize_1d_bilinear_kernel_map;
+    size_t kernel_map_size              = _cnt_of_array( _resize_1d_bilinear_kernel_map );
+    vx_param_description_t * param_def  = _resize_1d_bilinear_kernel_param_def;
+    size_t param_def_size               = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
+    vx_kernel_initialize_f  initializer = _resize_1d_bilinear_initializer;
+    uint32_t key = 0;
+    uint32_t i   = 0;
+    _internal_scale_e scale_flag = UP;
+
+    in_dtype  = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+    out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+    if (outputs[0]->attr.size[0] > inputs[0]->attr.size[0])
+    {
+        scale_flag = UP;
+
+        if (is_same_type)
+        {
+            scale_flag = UP_OPT;
+        }
+
+        if (2 * inputs[0]->attr.size[0] == outputs[0]->attr.size[0])
+        {
+            if (is_same_type && (!align_corners) && (half_pixel_centers))
+            {
+                scale_flag = UP_2X_HALF_SAME;
+            }
+            else if (is_same_type && (!align_corners) && (!half_pixel_centers))
+            {
+                scale_flag = UP_2X_SAME;
+            }
+        }
+        else if (4 * inputs[0]->attr.size[0] == outputs[0]->attr.size[0])
+        {
+            if (is_same_type && (!align_corners) && (half_pixel_centers))
+            {
+                scale_flag = UP_4X_HALF_SAME;
+            }
+            else if (is_same_type && (!align_corners) && (!half_pixel_centers))
+            {
+                scale_flag = UP_4X_SAME;
+            }
+        }
+        else if (8 * inputs[0]->attr.size[0] == outputs[0]->attr.size[0])
+        {
+            if (is_same_type && (!align_corners) && (half_pixel_centers))
+            {
+                scale_flag = UP_8X_HALF_SAME;
+            }
+            else if (is_same_type && (!align_corners) && (!half_pixel_centers))
+            {
+                scale_flag = UP_8X_SAME;
+            }
+        }
+    }
+    else
+    {
+        scale_flag = DOWN;
+        if (inputs[0]->attr.size[0] == 2 * outputs[0]->attr.size[0])
+        {
+            if (is_same_type && (!align_corners) && (half_pixel_centers))
+            {
+                scale_flag = DOWN_2X_HALF_SAME;
+            }
+            else if (is_same_type && (!align_corners) && (!half_pixel_centers))
+            {
+                scale_flag = DOWN_2X_SAME;
+            }
+        }
+    }
+
+    key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );
+    for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+    {
+        if ( kernel_map[i].key == key )
+        {
+            break;
+        }
+    }
+
+    if ((scale_flag > UP_OPT) && (i >= kernel_map_size) && is_same_type)
+    {
+        scale_flag = UP_OPT;
+        key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );
+        for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+        {
+            if ( kernel_map[i].key == key )
+            {
+                break;
+            }
+        }
+    }
+
+    if ((UP_OPT == scale_flag) && (i >= kernel_map_size))
+    {
+        scale_flag = UP;
+        key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );
+        for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+        {
+            if ( kernel_map[i].key == key )
+            {
+                break;
+            }
+        }
+    }
+
+    if ((scale_flag <= UP) && (scale_flag > DOWN) && (i >= kernel_map_size))
+    {
+        scale_flag = DOWN;
+        key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );
+        for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+        {
+            if ( kernel_map[i].key == key )
+            {
+                break;
+            }
+        }
+    }
+
+    if ( i < kernel_map_size )
+    {
+        if ((scale_flag > UP_OPT) || ((scale_flag > DOWN) && (scale_flag < UP)))
+        {
+            param_def_size = _RESIZE_1D_NX_KERENL_PARAM_NUM;
+            *is_run_nx_kernel = TRUE;
+        }
+        else if (UP_OPT == scale_flag)
+        {
+            param_def_size = _RESIZE_1D_BILINEAR_PARAM_NUM;
+            *is_run_opt_kernel = TRUE;
+        }
+        else
+        {
+            param_def_size = _RESIZE_NO_SCALE_PARAM_NUM;
+            *is_run_opt_kernel = FALSE;
+        }
+        *scale_flag_value = (int32_t)scale_flag;
+
+        snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  kernel_map[i].function_name );
+        kernel->info.parameters  = param_def;
+        kernel->info.numParams   = (uint32_t)param_def_size;
+        kernel->info.initialize  = initializer;
+        // Register code source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
+                "vsi_nn_kernel_header",
+                kernel_map[i].source_name );
+        // Register binary source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+                kernel_map[i].source_name );
+        status = VSI_SUCCESS;
+    }
+
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_tensor_t* _create_scale_tensor
+    (
+    vsi_nn_graph_t  *graph,
+    vsi_nn_tensor_t *input,
+    vsi_nn_tensor_t *output,
+    int32_t          align_corners,
+    int32_t          half_pixel_centers
+    )
+{
+    vsi_nn_tensor_attr_t attr;
+    vsi_nn_tensor_t*  scale         = NULL;
+    uint32_t   dims                 = output->attr.dim_num;
+    uint32_t   batch                = dims > 3 ? output->attr.size[3] : 1;
+    uint32_t   width                = output->attr.size[0];
+    uint32_t   sizes[4]             = {width * 2, 1, 1, batch};
+    uint32_t   item_count           = width * 2 * batch;
+    uint32_t   input_width          = input->attr.size[0];
+    uint32_t   x                    = 0;
+    uint32_t   b                    = 0;
+    float      width_scale          = 1.0f;
+    uint16_t  *scale_data_ptr       = NULL;
+
+    if (align_corners && width > 1)
+    {
+        width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(width - 1);
+    }
+    else
+    {
+        width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)width;
+    }
+
+    scale_data_ptr = (uint16_t *)malloc(item_count * sizeof(uint16_t));
+    if (scale_data_ptr == NULL)
+    {
+        VSILOGE("allocate memory fail at function %s line %d", __FUNCTION__, __LINE__);
+        goto OnError;
+    }
+    memset(scale_data_ptr, 0, item_count * sizeof(vx_uint16));
+    for (b = 0; b < batch; b ++)
+    {
+        for (x = 0; x < width; x ++)
+        {
+            float     input_w = 0.0f;
+            int32_t   w0      = 0;
+            uint32_t  idx     = b * width * 2 + x * 2;
+            float     tl      = 0.0f;
+            float     tr      = 0.0f;
+            if (half_pixel_centers)
+            {
+                input_w = ((vx_float32)x + 0.5f) * width_scale - 0.5f;
+            }
+            else
+            {
+                input_w = x * width_scale;
+            }
+            w0 = (vx_int32)input_w;
+            tl = (1 - (input_w - w0));
+            tr = (input_w - w0);
+
+            scale_data_ptr[idx + 0] = fp32_to_fp16(tl);
+            scale_data_ptr[idx + 1] = fp32_to_fp16(tr);
+        }
+    }
+
+    memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
+
+    attr.size[0] = sizes[0];
+    attr.size[1] = sizes[1];
+    attr.size[2] = sizes[2];
+    attr.size[3] = sizes[3];
+    attr.dim_num = (batch == 1) ? 2 : 4;
+    attr.dtype.vx_type = VSI_NN_TYPE_FLOAT16;
+    attr.vtl = FALSE;
+
+    scale = vsi_nn_CreateTensorFromData(graph, (uint8_t *)scale_data_ptr, &attr);
+    if (scale_data_ptr)
+    {
+        free(scale_data_ptr);
+        scale_data_ptr = NULL;
+    }
+
+OnError:
+    return scale;
+}
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_BILINEAR_PARAM_NUM] = {NULL};
+    vsi_nn_kernel_node_t node   = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+    vsi_bool is_same_type       = vsi_nn_is_same_type(inputs[0], outputs[0]);
+    vsi_bool is_run_opt_kernel  = FALSE;
+    vsi_bool is_run_nx_kernel   = FALSE;
+    vsi_nn_tensor_t*  scale     = NULL;
+    int32_t scale_flag_value   = 0;
+
+    status = _query_kernel( kernel, inputs, outputs, is_same_type,
+                           align_corners, half_pixel_centers,
+                           &is_run_opt_kernel, &is_run_nx_kernel,
+                           &scale_flag_value);
+
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            size_t node_params_num = _RESIZE_NO_SCALE_PARAM_NUM;
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_BILINEAR_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+
+            if (!is_run_nx_kernel)
+            {
+                node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
+                node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
+            }else
+            {
+               node_params[SCALAR_SCALE_TYPE] = vsi_nn_kernel_scalar_create( graph, I32, &scale_flag_value );
+               node_params_num =  _RESIZE_1D_NX_KERENL_PARAM_NUM;
+            }
+
+
+            if (is_run_opt_kernel)
+            {
+                scale = _create_scale_tensor(graph, inputs[0], outputs[0], align_corners, half_pixel_centers);
+                node_params[SCALAR_TENSOR_SCALE] = (vsi_nn_kernel_node_param_t)(scale->t);
+                node_params_num = _RESIZE_1D_BILINEAR_PARAM_NUM;
+            }
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            if (!is_run_nx_kernel)
+            {
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+            }
+            else
+            {
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_TYPE] );
+            }
+
+            if (is_run_opt_kernel)
+            {
+                if (scale)
+                {
+                    vsi_nn_ReleaseTensor(&scale);
+                }
+            }
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_EVIS( resize_1d_bilinear, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/evis/resize_1d_nearest_evis.c b/src/tim/vx/internal/src/kernel/evis/resize_1d_nearest_evis.c
new file mode 100644
index 0000000..9fc49ee
--- /dev/null
+++ b/src/tim/vx/internal/src/kernel/evis/resize_1d_nearest_evis.c
@@ -0,0 +1,533 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+typedef enum
+{
+    LARGE = 0,
+    SMALL
+} _internal_nearest_e;
+
+#define _RESIZE_1D_NEAREST_KERNEL_SOURCE      "resize_1d_nearest"
+
+#define STR(a) #a
+// Add kernel hashtable here
+#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, mode ) \
+        (( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) | (mode))
+
+
+#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, LARGE ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
+          _RESIZE_1D_NEAREST_KERNEL_SOURCE }
+
+#define PACK_KERNEL_MAP_OPT( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, SMALL ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_op"), \
+          _RESIZE_1D_NEAREST_KERNEL_SOURCE }
+
+typedef struct
+{
+    uint32_t key;
+    char * function_name;
+    const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
+{
+    // Register kernel here
+    PACK_KERNEL_MAP(F16, F16),
+    PACK_KERNEL_MAP(I16, I16),
+    PACK_KERNEL_MAP(I8, I8),
+    PACK_KERNEL_MAP(U8, U8),
+    PACK_KERNEL_MAP_OPT(F16, F16),
+    PACK_KERNEL_MAP_OPT(I16, I16),
+    PACK_KERNEL_MAP_OPT(I8, I8),
+    PACK_KERNEL_MAP_OPT(U8, U8),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_NEAREST_PARAM_NUM  _cnt_of_array( _resize_1d_nearest_kernel_param_def )
+
+#define SCALAR_ALIGN_CORNERS         (2)
+#define SCALAR_HALF_PIXEL            (3)
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+#define MAX_POST_SHIFT_BITS     (31)
+#define MAX_MULTIPLIER_NUM      (65535)
+    vsi_status status = VSI_FAILURE;
+    gpu_param_t gpu_param = {
+        3,
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0}
+        };
+    vsi_nn_kernel_tensor_attr_t * output_attr   = NULL;
+    vsi_nn_kernel_tensor_attr_t * input_attr    = NULL;
+    vsi_int_array_t             * out_shape     = NULL;
+    vsi_int_array_t             * in_shape      = NULL;
+    vsi_nn_kernel_dtype_e         input_dtype   = F16;
+    vsi_nn_kernel_dtype_e         output_dtype  = F16;
+    int32_t     align_corners      = 0;
+    int32_t     half_pixel_centers = 0;
+    uint32_t    depth              = 0;
+    int32_t     srcFixPointPos     = 0;
+    int32_t     dstFixPointPos     = 0;
+    float       input_scale        = 1.0;
+    int32_t     inputZP            = 0;
+    float       output_scale       = 1.0;
+    int32_t     outputZP           = 0;
+    float       scale_factor       = 1.0f;
+    uint32_t    in_width           = 0;
+    uint32_t    out_width          = 0;
+    uint32_t    out_height         = 0;
+    float       half_pixel_value   = 0.0f;
+    float       round_value        = 0.0f;
+
+    input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
+    CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
+    output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
+    CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
+
+    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &align_corners);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &half_pixel_centers);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+    out_shape     = output_attr->shape;
+    in_shape      = input_attr->shape;
+    input_dtype   = input_attr->dtype;
+    output_dtype  = output_attr->dtype;
+
+    in_width          = in_shape->data[0];
+    depth             = in_shape->data[2];
+    out_width         = out_shape->data[0];
+    out_height        = out_shape->data[1];
+
+    if (BF16 == input_dtype && output_dtype == BF16)
+    {
+        input_dtype  = F16;
+        output_dtype = F16;
+    }
+    if (align_corners && out_width > 1)
+    {
+        scale_factor = ((float)(in_width - 1) * 1.0f) / (float)(out_width - 1);
+    }
+    else
+    {
+        scale_factor = ((float)in_width * 1.0f) / (float)out_width;
+    }
+
+
+    if (align_corners)
+    {
+        round_value = 0.5f;
+    }
+    else
+    {
+        round_value = 0.0f;
+    }
+
+    if (half_pixel_centers)
+    {
+        half_pixel_value = 0.5f;
+    }
+    else
+    {
+        half_pixel_value = 0.0f;
+    }
+
+    if (VSI_NN_KERNEL_QUANT_ASYMM == input_attr->quant )
+    {
+        input_scale    = input_attr->asymm.scale;
+        inputZP        = input_attr->asymm.zero_point;
+    }
+    else if (VSI_NN_KERNEL_QUANT_DFP == input_attr->quant)
+    {
+        srcFixPointPos   = input_attr->dfp.fl;
+        if (srcFixPointPos >= 0)
+        {
+            input_scale = 1.0f / (float) ((int64_t)1 << srcFixPointPos);
+        }
+        else if (srcFixPointPos < 0)
+        {
+            input_scale = (float)((int64_t)1 << -srcFixPointPos);
+        }
+        inputZP = 0;
+    }
+    else
+    {
+        input_scale = 1.0f;
+        inputZP     = 0;
+    }
+
+    if (VSI_NN_KERNEL_QUANT_ASYMM == output_attr->quant )
+    {
+        output_scale   = 1.0f / output_attr->asymm.scale;
+        outputZP       = output_attr->asymm.zero_point;
+    }
+    else if (VSI_NN_KERNEL_QUANT_DFP == output_attr->quant)
+    {
+        dstFixPointPos = output_attr->dfp.fl;
+        if (dstFixPointPos >= 0)
+        {
+            output_scale = (float) ((int64_t)1 << dstFixPointPos);
+        }
+        else if (dstFixPointPos < 0)
+        {
+            output_scale = 1.0f / (float) ((int64_t)1 << -dstFixPointPos);
+        }
+        outputZP = 0;
+    }
+    else
+    {
+        output_scale = 1.0;
+        outputZP     = 0;
+    }
+
+    if (F16 == input_dtype && F16 == output_dtype)
+    {
+        gpu_dp_inst_t uniGetExtractData_2x8 = {{
+            0x00009999, // TCfg
+            0x00000000, // ASelt
+            0x06040200, 0x00000000, // ABin
+            0x0000aaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000300, // AccumType, ConstantType, and PostShift
+            0x00100010, 0x00100010, 0x00100010, 0x00100010,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+
+        if (scale_factor < 4.0f)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+
+        gpu_param.global_scale[0] = 4;
+        gpu_param.global_scale[1] = 1;
+        gpu_param.global_scale[2] = 1;
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    else if ( input_dtype == output_dtype && (I8 == input_dtype || I16 == input_dtype))
+    {
+        gpu_dp_inst_t uniGetExtractData_2x8 = {{
+            0x00009999, // TCfg
+            0x00000000, // ASelt
+            0x06040200, 0x00000000, // ABin
+            0x0000aaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000300, // AccumType, ConstantType, and PostShift
+            0x00080008, 0x00080008, 0x00080008, 0x00080008,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniConvertI8toI8_2x8 = {{
+            0x11111111, // TCfg
+            0x00000000, // ASelt
+            0x03020100, 0x07060504, // ABin
+            0x22222222, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000600, // AccumType, ConstantType, and PostShift
+            0x00000001, 0x00000001, 0x00000001, 0x00000001,
+            0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
+        }, GPU_DP_TYPE_16};
+
+        if (I16 == input_dtype)
+        {
+            uniGetExtractData_2x8.data[8]  = 0x00100010;
+            uniGetExtractData_2x8.data[9]  = 0x00100010;
+            uniGetExtractData_2x8.data[10] = 0x00100010;
+            uniGetExtractData_2x8.data[11] = 0x00100010;
+            uniGetExtractData_2x8.data[12] = 0x00100010;
+            uniGetExtractData_2x8.data[13] = 0x00100010;
+            uniGetExtractData_2x8.data[14] = 0x00100010;
+            uniGetExtractData_2x8.data[15] = 0x00100010;
+        }
+
+        if (srcFixPointPos > dstFixPointPos)
+        {
+            int32_t  postshift      = vsi_nn_min(srcFixPointPos - dstFixPointPos, MAX_POST_SHIFT_BITS);
+
+            uniConvertI8toI8_2x8.data[7] |= (postshift & 0x1F);
+        }
+        else
+        {
+            uint32_t multiplier = vsi_nn_min((int64_t)1 << (dstFixPointPos - srcFixPointPos), MAX_MULTIPLIER_NUM);
+            uint32_t i          = 0;
+
+            for (i = 0; i < 8; i++)
+            {
+                uniConvertI8toI8_2x8.data[i + 8] = multiplier;
+            }
+        }
+
+        if (scale_factor < 4.0f)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+
+        gpu_param.global_scale[0] = 4;
+        gpu_param.global_scale[1] = 1;
+        gpu_param.global_scale[2] = 1;
+
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertI8toI8_2x8", &uniConvertI8toI8_2x8);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    else if (U8 == input_dtype && U8 == output_dtype)
+    {
+        uint16_t  M0                   = 0;
+        int32_t   postShift            = 0;
+        uint32_t  multAndoutZP[2]      = {0};
+        gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
+            0xdddddddd, // TCfg
+            0x44444444, // ASelt
+            0x13121110, 0x17161514, // ABin
+            0x11111111, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00002400, // AccumType, ConstantType, and PostShift
+            0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniGetExtractData_2x8 = {{
+            0x00009999, // TCfg
+            0x00000000, // ASelt
+            0x06040200, 0x00000000, // ABin
+            0x0000aaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000300, // AccumType, ConstantType, and PostShift
+            0x00080008, 0x00080008, 0x00080008, 0x00080008,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+
+        gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);
+
+        multAndoutZP[0] = (uint32_t)(M0);
+        multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);
+
+        uniMultiplyAndPostShift_2x8.data[7] |= (postShift & 0x1F);
+
+        if (scale_factor < 4.0f)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+
+        gpu_param.global_scale[0] = 4;
+        gpu_param.global_scale[1] = 1;
+        gpu_param.global_scale[2] = 1;
+
+
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        status |= vsi_nn_kernel_gpu_add_param( node, "multAndoutZP", multAndoutZP);
+        status |= vsi_nn_kernel_gpu_add_param( node, "uniMultiplyAndPostShift_2x8", &uniMultiplyAndPostShift_2x8);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    status  = vsi_nn_kernel_gpu_add_param( node, "half_pixel_value", &half_pixel_value);
+    status |= vsi_nn_kernel_gpu_add_param( node, "round_value", &round_value);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+
+    gpu_param.global_size[0]   = gpu_align_p2((out_width  + gpu_param.global_scale[0] - 1)\
+                                / gpu_param.global_scale[0], 4);
+    gpu_param.global_size[1]   = (out_height + gpu_param.global_scale[1] - 1) / gpu_param.global_scale[1];
+    gpu_param.global_size[2]   = depth;
+
+
+    status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+#undef MAX_MULTIPLIER_NUM
+#undef MAX_POST_SHIFT_BITS
+final:
+    if (input_attr) vsi_nn_kernel_tensor_attr_release( &input_attr );
+    if (output_attr) vsi_nn_kernel_tensor_attr_release( &output_attr );
+    return status;
+} /* _resize_nearest_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs,
+        int32_t align_corners
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_dtype_e in_dtype = F16;
+    vsi_nn_kernel_dtype_e out_dtype = F16;
+    const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
+    size_t kernel_map_size              = _cnt_of_array( _resize_1d_nearest_kernel_map );
+    vx_param_description_t * param_def  = _resize_1d_nearest_kernel_param_def;
+    size_t param_def_size               = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
+    vx_kernel_initialize_f  initializer = _resize_1d_nearest_initializer;
+
+    uint32_t key = 0;
+    uint32_t i   = 0;
+    uint32_t inputWidth  = inputs[0]->attr.size[0];
+    uint32_t outputWidth = outputs[0]->attr.size[0];
+    float    scale_factor;
+    _internal_nearest_e resize_mode = LARGE;
+
+    if (align_corners && outputWidth > 1)
+    {
+        scale_factor = (vx_float32)(inputWidth - 1) / (vx_float32)(outputWidth - 1);
+    }
+    else
+    {
+        scale_factor = (vx_float32)inputWidth / (vx_float32)outputWidth;
+    }
+
+    if (scale_factor < 4.0f)
+    {
+        resize_mode = SMALL;
+    }
+    else
+    {
+        resize_mode = LARGE;
+    }
+
+
+    in_dtype  = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+    out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+    if (BF16 == in_dtype && BF16 == out_dtype)
+    {
+        in_dtype  = F16;
+        out_dtype = F16;
+    }
+
+    key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype, resize_mode );
+
+    for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+    {
+        if( kernel_map[i].key == key )
+        {
+            break;
+        }
+    }
+
+    if ( i < (uint32_t)kernel_map_size )
+    {
+        snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  kernel_map[i].function_name );
+        kernel->info.parameters  = param_def;
+        kernel->info.numParams   = (uint32_t)param_def_size;
+        kernel->info.initialize  = initializer;
+        // Register code source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
+                "vsi_nn_kernel_header",
+                kernel_map[i].source_name );
+        // Register binary source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+                kernel_map[i].source_name );
+        status = VSI_SUCCESS;
+    }
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+
+    status = _query_kernel( kernel, inputs, outputs, align_corners );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
+            node_params[SCALAR_HALF_PIXEL]    = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_EVIS( resize_1d_nearest, _setup )
+
diff --git a/src/tim/vx/internal/src/kernel/evis/resize_bilinear_evis.c b/src/tim/vx/internal/src/kernel/evis/resize_bilinear_evis.c
index 52fb9d4..0cc7c61 100644
--- a/src/tim/vx/internal/src/kernel/evis/resize_bilinear_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/resize_bilinear_evis.c
@@ -850,7 +850,7 @@ static vsi_status _query_kernel
         }
     }
 
-    if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size))
+    if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size) && is_same_type && is_evis2)
     {
         scale_flag = UP_OPT;
         key = RESIZE_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );
diff --git a/src/tim/vx/internal/src/kernel/evis/resize_nearest_evis.c b/src/tim/vx/internal/src/kernel/evis/resize_nearest_evis.c
index 9d17244..9b485a0 100644
--- a/src/tim/vx/internal/src/kernel/evis/resize_nearest_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/resize_nearest_evis.c
@@ -348,7 +348,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
     else if (U8 == input_dtype && U8 == output_dtype)
     {
         uint16_t  M0                   = 0;
-        vx_int8   postShift            = 0;
+        int32_t   postShift            = 0;
         uint32_t  multAndoutZP[2]      = {0};
         gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
             0xdddddddd, // TCfg
@@ -371,7 +371,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
             0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
         }, GPU_DP_TYPE_16};
 
-        vsi_nn_GetFP32MultiAndPostShift(input_scale * output_scale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);
 
         multAndoutZP[0] = (uint32_t)(M0);
         multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);
diff --git a/src/tim/vx/internal/src/kernel/evis/select_evis.c b/src/tim/vx/internal/src/kernel/evis/select_evis.c
index 9e95f5d..e0975d3 100644
--- a/src/tim/vx/internal/src/kernel/evis/select_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/select_evis.c
@@ -131,9 +131,9 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
     float    outputScale                    = 1.0f;
     int32_t  outputZP                       = 0;
     uint16_t in0_M0                         = 0;
-    int8_t   in0_postShift                  = 0;
+    int32_t  in0_postShift                  = 0;
     uint16_t in1_M0                         = 0;
-    int8_t   in1_postShift                  = 0;
+    int32_t  in1_postShift                  = 0;
     uint32_t pack_key                       = 0;
     input0_attr  = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
     CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
@@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
         outputZP    = output_attr->asymm.zero_point;
     }
 
-    vsi_nn_GetFP32MultiAndPostShift(input0Scale / outputScale, &in0_M0, &in0_postShift);
-    vsi_nn_GetFP32MultiAndPostShift(input1Scale / outputScale, &in1_M0, &in1_postShift);
+    gpu_quantize_multiplier_16bit(input0Scale / outputScale, &in0_M0, &in0_postShift);
+    gpu_quantize_multiplier_16bit(input1Scale / outputScale, &in1_M0, &in1_postShift);
 
     pack_key = _PACK_SELECT_KEY( input0_attr->dtype, input1_attr->dtype, output_attr->dtype );
 
diff --git a/src/tim/vx/internal/src/kernel/evis/tile_evis.c b/src/tim/vx/internal/src/kernel/evis/tile_evis.c
index 7fa9215..a076329 100644
--- a/src/tim/vx/internal/src/kernel/evis/tile_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/tile_evis.c
@@ -309,7 +309,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
     {
         float     uint8Scale = scaleIn / scaleOut;
         uint16_t  M0                   = 0;
-        int8_t    postShift            = 0;
+        int32_t   postShift            = 0;
         uint32_t  multAndoutZP[2]      = {0};
 
         gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
@@ -323,7 +323,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
             0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
         }, GPU_DP_TYPE_16};
 
-        vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
         multAndoutZP[0] = (uint32_t)(M0);
         multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);
 
diff --git a/src/tim/vx/internal/src/kernel/evis/upsample_evis.c b/src/tim/vx/internal/src/kernel/evis/upsample_evis.c
index 1f96b93..df549fd 100644
--- a/src/tim/vx/internal/src/kernel/evis/upsample_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/upsample_evis.c
@@ -154,7 +154,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
     int32_t  input_fl                          = 0;
     int32_t  output_fl                         = 0;
     uint16_t M0                                = 0;
-    int8_t   postShift                         = 0;
+    int32_t  postShift                         = 0;
     float    inputScale                        = 1.0f;
     int32_t  input_ZP                          = 0;
     float    outputScale                       = 1.0f;
@@ -212,7 +212,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
     factorOut = 1.0f / outputScale;
 
 
-    vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift);
+    gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);
 
     image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);
 
diff --git a/src/tim/vx/internal/src/kernel/vsi_nn_gpu.c b/src/tim/vx/internal/src/kernel/vsi_nn_gpu.c
index 34d4408..9856fca 100644
--- a/src/tim/vx/internal/src/kernel/vsi_nn_gpu.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_gpu.c
@@ -84,7 +84,7 @@ void gpu_quantize_multiplier_32bit
     double q;
     int64_t q_fixed;
     const int32_t bit = 32;
-    if( vsi_abs(double_multiplier - 0.0) < 1e-5 )
+    if( vsi_abs(double_multiplier - 0.0) < 1e-8 )
     {
         *quantize_multiplier = 0;
         *shift = bit - 0;
@@ -116,6 +116,25 @@ void gpu_quantize_multiplier_32bit
     }
 } /* gpu_quantize_multiplier_32_bit() */
 
+void _modify_multiplier_postshift
+    (
+    uint16_t * quantize_multiplier,
+    int32_t * shift
+    )
+{
+    uint16_t multiplier = *quantize_multiplier;
+    int32_t  postshift  = *shift;
+
+    while (postshift > GPU_MAX_POST_SHIFT_BITS)
+    {
+        multiplier = (multiplier + 1) >> 1;
+        postshift --;
+    }
+
+    *quantize_multiplier = multiplier;
+    *shift = postshift;
+}
+
 void gpu_quantize_multiplier_16bit
     (
     double double_multiplier,
@@ -135,5 +154,8 @@ void gpu_quantize_multiplier_16bit
     {
         *shift -= bit;
     }
+
+    _modify_multiplier_postshift(quantize_multiplier, shift);
+
 } /* gpu_quantize_multiplier_16bit() */
 
diff --git a/src/tim/vx/internal/src/kernel/vsi_nn_kernel.c b/src/tim/vx/internal/src/kernel/vsi_nn_kernel.c
index af3b91b..e0c43e2 100644
--- a/src/tim/vx/internal/src/kernel/vsi_nn_kernel.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_kernel.c
@@ -1145,10 +1145,10 @@ vsi_nn_kernel_tensor_attr_t * vsi_nn_kernel_tensor_attr_create
         status = vxQueryTensor( (vx_tensor)tensor, VX_TENSOR_SCALE,
             &(attr->asymm.scale), sizeof(float));
         CHECK_STATUS( status );
-        // Reset scale to 1
-        if( (attr->asymm.scale - 0.f) < 1e-5 )
+        // Reset scale to 1e-8
+        if( (attr->asymm.scale - 0.f) < 1e-8 )
             {
-            attr->asymm.scale = 1.0f;
+            attr->asymm.scale = (float)1e-8;
             attr->asymm.zero_point = 0;
             }
         }
@@ -1225,12 +1225,16 @@ vsi_status vsi_nn_kernel_pirority_set
 static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
 {
     char *envctrl;
-    int32_t enableShader = 1;
+    static int32_t enableShader = -1;
 
-    envctrl = getenv("VIV_VX_ENABLE_SHADER");
-    if (envctrl)
+    if (enableShader == -1)
     {
-        enableShader = atoi(envctrl);
+        enableShader = 1;
+        envctrl = getenv("VIV_VX_ENABLE_SHADER");
+        if (envctrl)
+        {
+            enableShader = atoi(envctrl);
+        }
     }
 
 #if VX_HARDWARE_CAPS_PARAMS_EXT_SUPPORT
@@ -1240,7 +1244,7 @@ static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
     }
 #endif
 
-    if(enableShader == 1)
+    if (enableShader >= 1)
     {
         return TRUE;
     }
diff --git a/src/tim/vx/internal/src/kernel/vsi_nn_kernel_eltwise.c b/src/tim/vx/internal/src/kernel/vsi_nn_kernel_eltwise.c
index 7be998c..53597b9 100644
--- a/src/tim/vx/internal/src/kernel/vsi_nn_kernel_eltwise.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_kernel_eltwise.c
@@ -459,7 +459,7 @@ vsi_bool vsi_nn_kernel_optimize_broadcast_shape
         k = 0;
         for (j = 0; j < (size_t)input_num; j++)
         {
-            if (size_in[k] > 1)
+            if (size_in[j] > 1)
             {
                 k = j;
                 break;
diff --git a/src/tim/vx/internal/src/kernel/vsi_nn_kernel_util.c b/src/tim/vx/internal/src/kernel/vsi_nn_kernel_util.c
index 79b3468..9ea24e5 100644
--- a/src/tim/vx/internal/src/kernel/vsi_nn_kernel_util.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_kernel_util.c
@@ -603,10 +603,14 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
     vsi_nn_tensor_t * bias
     )
 {
-    vsi_nn_tensor_t * new_bias = NULL;
+    vsi_nn_tensor_t * new_bias   = NULL;
     vsi_nn_tensor_attr_t attr;
-
+    int32_t  *new_bias_data_ptr  = NULL;
+    uint8_t  *weight_data        = NULL;
+    int32_t  *bias_data          = NULL;
+    uint32_t  i, j;
     memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
+    weight_data = vsi_nn_ConvertTensorToData(graph, weight);
 
     if (bias == NULL)
     {
@@ -620,26 +624,47 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
             attr.dtype.zero_point = 0;
             attr.dtype.vx_type = VSI_NN_TYPE_INT32;
         }
-        else
-        {
-            VSILOGE("need to add ...");
-        }
     }
     else
     {
         memcpy(&attr, &bias->attr, sizeof(vsi_nn_tensor_attr_t));
+        if (attr.dim_num == 1)
+        {
+            attr.size[1]  = 1;
+            attr.dim_num  = 2;
+        }
+        bias_data = (int32_t *)vsi_nn_ConvertTensorToData(graph, bias);
     }
 
-    new_bias = vsi_nn_CreateTensorWithDefault(graph, &attr, 0.0);
+    new_bias_data_ptr = (int32_t *)malloc(attr.size[0] * sizeof(int32_t));
+    memset((void *)new_bias_data_ptr, 0, sizeof(int32_t) * attr.size[0]);
 
-    if (input->attr.dtype.zero_point == 0)
+    if (input->attr.dtype.zero_point != 0)
     {
-        return new_bias;
+        for (i = 0; i < weight->attr.size[1]; i++)
+        {
+            uint8_t *weight_ptr = weight_data + i * weight->attr.size[0];
+            for (j = 0; j < weight->attr.size[0]; j++)
+            {
+                 new_bias_data_ptr[i] += -((int32_t)weight_ptr[j] - weight->attr.dtype.zero_point) \
+                                         * input->attr.dtype.zero_point;
+            }
+        }
     }
-    else
+
+    if (bias_data != NULL)
     {
-        VSILOGE("need to process bias - (input_zp * (w - w_zp)) ...");
+        for (i = 0; i < weight->attr.size[1]; i++)
+        {
+            new_bias_data_ptr[i] += bias_data[i];
+        }
     }
 
+    new_bias = vsi_nn_CreateTensorFromData(graph, (uint8_t *)new_bias_data_ptr, &attr);
+
+    vsi_nn_safe_free( new_bias_data_ptr );
+    vsi_nn_safe_free( bias_data );
+    vsi_nn_safe_free( weight_data );
+
     return new_bias;
 }
diff --git a/src/tim/vx/internal/src/kernel/vx/convolutional.c b/src/tim/vx/internal/src/kernel/vx/convolutional.c
index 235e5ac..bb0d060 100644
--- a/src/tim/vx/internal/src/kernel/vx/convolutional.c
+++ b/src/tim/vx/internal/src/kernel/vx/convolutional.c
@@ -31,6 +31,8 @@
 #include "kernel/vsi_nn_kernel.h"
 #include "kernel/vsi_nn_kernel_node.h"
 #include "vsi_nn_feature.h"
+#include "vsi_nn_tensor_util.h"
+#include "vsi_nn_graph_optimization.h"
 
 static vsi_bool _build_vx_conv2d_param
     (
@@ -173,6 +175,7 @@ static vx_tensor _expand_tensor_dim
         vsi_nn_kernel_t             * kernel \
         )
 
+
 REGISTER_CONV_OPENVX_KERNEL( conv1d )
 {
     vx_node node = NULL;
@@ -196,10 +199,34 @@ REGISTER_CONV_OPENVX_KERNEL( conv1d )
     temp_tensors[0] = _expand_tensor_dim( inputs[0]->t,
             (int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
     CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );
+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
+        temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
+                (int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
+        CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
+    }
+    else
+    {
+        uint8_t    * data = NULL;
+        vsi_nn_tensor_attr_t attr;
+        uint32_t i;
 
-    temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
-            (int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
-    CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
+        data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
+        CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
+
+        memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
+
+        attr.size[0] = 1;
+        for (i = 1; i <= inputs[1]->attr.dim_num; i++)
+        {
+            attr.size[i] = inputs[1]->attr.size[i - 1];
+        }
+        attr.dim_num = inputs[1]->attr.dim_num + 1;
+        attr.dtype.channel_dim = inputs[1]->attr.dtype.channel_dim + 1;
+
+        temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
+        vsi_nn_safe_free( data );
+    }
 
     temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
             (int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );
@@ -248,9 +275,38 @@ REGISTER_CONV_OPENVX_KERNEL( depthwise_conv1d )
             (int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
     CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );
 
-    temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
-            (int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
-    CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
+        temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
+                (int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
+        CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
+    }
+    else
+    {
+        uint8_t    * data = NULL;
+        vsi_nn_tensor_attr_t attr;
+        uint32_t i;
+
+        data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
+        CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
+
+        memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
+
+        attr.size[0] = 1;
+        attr.size[1] = inputs[1]->attr.size[0];
+        attr.size[2] = 1;
+        for (i = 1; i < inputs[1]->attr.dim_num; i++)
+        {
+            attr.size[2] *= inputs[1]->attr.size[i];
+        }
+        attr.size[3] = 1;
+        attr.dim_num = 4;
+        attr.dtype.channel_dim = 2;
+
+        temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
+
+        vsi_nn_safe_free( data );
+    }
 
     temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
             (int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );
diff --git a/src/tim/vx/internal/src/kernel/vx/eltwise_unary_vx.c b/src/tim/vx/internal/src/kernel/vx/eltwise_unary_vx.c
index e259554..c78de9d 100644
--- a/src/tim/vx/internal/src/kernel/vx/eltwise_unary_vx.c
+++ b/src/tim/vx/internal/src/kernel/vx/eltwise_unary_vx.c
@@ -38,27 +38,27 @@ typedef struct _sort_lut_s
     float val;
 } sort_lut;
 
-static float exp_eval(float val)
+static float exp_eval(float val, float alpha)
 {
     return expf(val);
 }
 
-static float log_eval(float data)
+static float log_eval(float data, float alpha)
 {
     return logf(data);
 }
 
-static float elu_eval(float data)
+static float elu_eval(float data, float alpha)
 {
-    return data >=0 ? data : expf(data) - 1;
+    return data >=0 ? data : expf(data) * alpha - alpha;
 }
 
-static float neg_eval(float data)
+static float neg_eval(float data, float alpha)
 {
     return data * -1.0f;
 }
 
-static float hsigmoid_eval(float data)
+static float hsigmoid_eval(float data, float alpha)
 {
     data = (float)(0.2 * data + 0.5);
     data = vsi_nn_clamp(data, 0, 1);
@@ -66,14 +66,14 @@ static float hsigmoid_eval(float data)
     return data;
 }
 
-static float soft_plus_eval(float data)
+static float soft_plus_eval(float data, float alpha)
 {
-    return log_eval(exp_eval(data) + 1);
+    return log_eval(exp_eval(data, alpha) + 1, alpha);
 }
 
-static float mish_eval(float data)
+static float mish_eval(float data, float alpha)
 {
-    data = (float)(data * tanh(soft_plus_eval(data)));
+    data = (float)(data * tanh(soft_plus_eval(data, alpha)));
 
     return data;
 }
@@ -96,7 +96,7 @@ static int32_t _lut_comparator(const void *pa, const void *pb)
     return 0;
 }
 
-static void _set_unary_table_lookup(float func(float), float *index, float *value)
+static void _set_unary_table_lookup(float func(float, float), float *index, float *value, float alpha)
 {
 #define VSI_NN_MAX_LUT_SIZE     (1024)
 #define FLT16_MAX               (57344)
@@ -108,25 +108,25 @@ static void _set_unary_table_lookup(float func(float), float *index, float *valu
     {
         int16_t val = (int16_t)(i << 6);
         lut[i].index = fp16_to_fp32(val);
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
     }
 
     for (i = 0x0; i < 0x10; i++)
     {
         lut[i].index = 0;
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
     }
 
     for (i = 0x1F0; i < 0x200; i++)
     {
         lut[i].index = FLT16_MAX;
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
     }
 
     for (i = 0x3F0; i < 0x400; i++)
     {
         lut[i].index = FLT16_MIN;
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
     }
 
     qsort(lut, VSI_NN_MAX_LUT_SIZE, sizeof(sort_lut), _lut_comparator);
@@ -154,13 +154,14 @@ static vsi_nn_kernel_node_t _setup
     size_t                        output_num,
     const vsi_nn_kernel_param_t * params,
     vsi_nn_kernel_t             * kernel,
-    float                      func(float)
+    float                      func(float, float)
     )
 {
 #ifdef VX_USER_LOOKUP_TABLE_SUPPORT
     vx_lut lut1 = NULL;
     vx_lut lut2 = NULL;
     vx_node node = NULL;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
     float index[1024] = {0};
     float value[1024] = {0};
 
@@ -172,7 +173,7 @@ static vsi_nn_kernel_node_t _setup
         return NULL;
     }
 
-    _set_unary_table_lookup(func, index, value);
+    _set_unary_table_lookup(func, index, value, alpha);
 
     lut1 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);
     lut2 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);
diff --git a/src/tim/vx/internal/src/libnnext/ops/cl/eltwise_unary.cl b/src/tim/vx/internal/src/libnnext/ops/cl/eltwise_unary.cl
index c702951..68febfb 100644
--- a/src/tim/vx/internal/src/libnnext/ops/cl/eltwise_unary.cl
+++ b/src/tim/vx/internal/src/libnnext/ops/cl/eltwise_unary.cl
@@ -1,12 +1,12 @@
 
-float4 eltwise_unary_sin(float4 x)
+float4 eltwise_unary_sin(float4 x, float alpha)
 {
     return native_sin(x);
 }
 
 #define logE        (1.44269502f)
 #define twoLogE     (logE * 2.0f)
-float4 eltwise_unary_exp(float4 x)
+float4 eltwise_unary_exp(float4 x, float alpha)
 {
     x *= logE;
     x = exp2(x);
@@ -14,33 +14,33 @@ float4 eltwise_unary_exp(float4 x)
 }
 
 #define rlogE    (0.693147182f)
-float4 eltwise_unary_log(float4 x)
+float4 eltwise_unary_log(float4 x, float alpha)
 {
     x = log2(x);
     return x * rlogE;
 }
 
-float4 eltwise_unary_elu(float4 val)
+float4 eltwise_unary_elu(float4 val, float alpha)
 {
     float4 x = val * logE;
-    x = exp2(x) - 1;
+    x = exp2(x) * alpha - alpha;
 
     return val < 0 ? x : val;
 }
 
-float4 eltwise_unary_neg(float4 x)
+float4 eltwise_unary_neg(float4 x, float alpha)
 {
     return x * -1;
 }
 
-float4 eltwise_unary_hard_sigmoid(float4 x)
+float4 eltwise_unary_hard_sigmoid(float4 x, float alpha)
 {
     x = 0.2 * x + 0.5;
     x = clamp(x, 0, 1);
     return x;
 }
 
-float4 _softrelu(float4 x)
+float4 _softrelu(float4 x, float alpha)
 {
     x *= logE;
     x = exp2(x);
@@ -49,7 +49,7 @@ float4 _softrelu(float4 x)
     return x * rlogE;
 }
 
-float4 _tanh(float4 x)
+float4 _tanh(float4 x, float alpha)
 {
     x *= -twoLogE;
     x = 1 + exp2(x);
@@ -57,10 +57,10 @@ float4 _tanh(float4 x)
     return (2 * x - 1);
 }
 
-float4 eltwise_unary_mish(float4 x)
+float4 eltwise_unary_mish(float4 x, float alpha)
 {
-    float4 y = _softrelu(x);
-    x = x * _tanh(y);
+    float4 y = _softrelu(x, alpha);
+    x = x * _tanh(y, alpha);
     return x;
 }
 
@@ -72,14 +72,15 @@ __kernel void func_name##_F32toF32 \
                  float           inputScale, \
                  float           inputTail, \
                  float           outputScale, \
-                 float           outputZP \
+                 float           outputZP, \
+                 float           alpha \
     ) \
 { \
     int4 coord =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
  \
     float4 src = read_imagef(input, coord); \
  \
-    float4 dst = eltwise_unary_##func_name(src); \
+    float4 dst = eltwise_unary_##func_name(src, alpha); \
  \
     write_imagef(output, coord, dst); \
 }
@@ -99,14 +100,15 @@ __kernel void func_name##_F32toF32_2D \
                  float     inputScale, \
                  float     inputTail, \
                  float     outputScale, \
-                 float     outputZP \
+                 float     outputZP, \
+                 float     alpha \
     ) \
 { \
     int2 coord =  (int2)(get_global_id(0), get_global_id(1)); \
  \
     float4 src = read_imagef(input, coord); \
  \
-    float4 dst = eltwise_unary_##func_name(src); \
+    float4 dst = eltwise_unary_##func_name(src, alpha); \
  \
     write_imagef(output, coord, dst); \
 }
@@ -126,7 +128,8 @@ __kernel void func_name##_U8toU8 \
                  float           inputScale, \
                  float           inputTail, \
                  float           outputScale, \
-                 float           outputZP \
+                 float           outputZP, \
+                 float           alpha \
     ) \
 { \
     int4 coord =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
@@ -134,7 +137,7 @@ __kernel void func_name##_U8toU8 \
     uint4 src = read_imageui(input, coord); \
     float4 data = convert_float4(src) * inputScale - inputTail; \
  \
-    data = eltwise_unary_##func_name(data); \
+    data = eltwise_unary_##func_name(data, alpha); \
     uint4 dst = convert_uint4(data * outputScale + outputZP); \
  \
     write_imageui(output, coord, dst); \
@@ -155,7 +158,8 @@ __kernel void func_name##_U8toU8_2D \
                  float     inputScale, \
                  float     inputTail, \
                  float     outputScale, \
-                 float     outputZP \
+                 float     outputZP, \
+                 float     alpha \
     ) \
 { \
     int2 coord =  (int2)(get_global_id(0), get_global_id(1)); \
@@ -163,7 +167,7 @@ __kernel void func_name##_U8toU8_2D \
     uint4 src = read_imageui(input, coord); \
     float4 data = convert_float4(src) * inputScale - inputTail; \
  \
-    data = eltwise_unary_##func_name(data); \
+    data = eltwise_unary_##func_name(data, alpha); \
     uint4 dst = convert_uint4(data * outputScale + outputZP); \
  \
     write_imageui(output, coord, dst); \
@@ -184,7 +188,8 @@ __kernel void neg_I32toI32
                  float           inputScale,
                  float           inputTail,
                  float           outputScale,
-                 float           outputZP
+                 float           outputZP,
+                 float           alpha
     )
 {
     int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
@@ -202,7 +207,8 @@ __kernel void neg_I32toI32_2D
                  float     inputScale,
                  float     inputTail,
                  float     outputScale,
-                 float     outputZP
+                 float     outputZP,
+                 float     alpha
     )
 {
     int2 coord =  (int2)(get_global_id(0), get_global_id(1));
diff --git a/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_bilinear.cl b/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_bilinear.cl
new file mode 100644
index 0000000..c7cbde2
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_bilinear.cl
@@ -0,0 +1,57 @@
+__kernel void resize_1d_bilinear_F32toF32(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value
+                           )
+{
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    float  in_x         = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
+    float  left_x_f     = floor(in_x);
+    float  x_lerp       = in_x - left_x_f;
+    int    left_x_idx   = convert_int(left_x_f);
+    int4   coord_in     = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
+    float4 top_l, top_r, top, bottom, dst;
+
+    top_l    = read_imagef(input, coord_in);
+    coord_in.x++;
+    top_r    = read_imagef(input, coord_in);
+
+    top_r    = top_r - top_l;
+    dst      = top_l + x_lerp * top_r;
+
+    write_imagef(output, coord_out, dst);
+
+}
+
+
+__kernel void resize_1d_bilinear_U8toU8(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value,
+                           float  in_scale,
+                           float  in_tail,
+                           float  out_scale,
+                           float  out_tail
+                           )
+{
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    float  in_x         = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
+    float  left_x_f     = floor(in_x);
+    float  x_lerp       = in_x - left_x_f;
+    int    left_x_idx   = convert_int(left_x_f);
+    int4   coord_in     = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
+    float4 top_l, top_r, top;
+    uint4  dst;
+
+    top_l    = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
+    coord_in.x++;
+    top_r    = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
+
+    top_r    = top_r - top_l;
+    top      = top_l + x_lerp * top_r;
+    dst      = convert_uint4(top * out_scale + out_tail);
+
+    write_imageui(output, coord_out, dst);
+}
diff --git a/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_nearest.cl b/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_nearest.cl
new file mode 100644
index 0000000..f6ac8b9
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_nearest.cl
@@ -0,0 +1,36 @@
+
+#define NEAREST_INDEX_PROCESS() \
+    int4   coord_out  = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
+    float  in_x       = (convert_float(coord_out.x) + half_pixel_value) * scale_x + round_value; \
+    int    in_x_idx   = convert_int(in_x); \
+
+__kernel void resize_1d_nearest_F32toF32(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value,
+                           float  round_value)
+{
+    NEAREST_INDEX_PROCESS()
+    int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
+    float4 dst;
+    dst    = read_imagef(input, coord_in);
+    write_imagef(output, coord_out, dst);
+}
+
+
+__kernel void resize_1d_nearest_U8toU8(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value,
+                           float  round_value,
+                           float  output_scale,
+                           float  output_tail)
+{
+    NEAREST_INDEX_PROCESS()
+    int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
+    uint4 dst;
+    dst    = convert_uint4(convert_float4(read_imageui(input, coord_in)) * output_scale + output_tail);
+    write_imageui(output, coord_out, dst);
+}
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_2d.vx b/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_2d.vx
index bc3b6c4..8b03b5c 100644
--- a/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_2d.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_2d.vx
@@ -1,5 +1,7 @@
 #include "cl_viv_vx_ext.h"
 
+_viv_uniform float alpha;
+
 float4 eltwise_unary_sin(float4 x)
 {
     return native_sin(x);
@@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
 float4 eltwise_unary_elu(float4 val)
 {
     float4 x = val * logE;
-    x = exp2(x) - 1;
+    x = exp2(x) * alpha - alpha;
 
     return val < 0 ? x : val;
 }
@@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
     __kernel void func_name##_##src_type_name##to##dst_type_name##_2D( \
     __read_only  image2d_array_t  input, \
     __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
     ) \
 { \
     int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
@@ -194,7 +197,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
     __kernel void func_name##_BF16toBF16_2D( \
     __read_only  image2d_array_t  input, \
     __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
     ) \
 { \
     int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_3d.vx b/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_3d.vx
index 832c948..f452849 100644
--- a/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_3d.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_3d.vx
@@ -1,5 +1,7 @@
 #include "cl_viv_vx_ext.h"
 
+_viv_uniform float alpha;
+
 float4 eltwise_unary_sin(float4 x)
 {
     return native_sin(x);
@@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
 float4 eltwise_unary_elu(float4 val)
 {
     float4 x = val * logE;
-    x = exp2(x) - 1;
+    x = exp2(x) * alpha - alpha;
 
     return val < 0 ? x : val;
 }
@@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
 __kernel void func_name##_##src_type_name##to##dst_type_name( \
     __read_only  image2d_array_t  input, \
     __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
     ) \
 { \
     int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
@@ -192,7 +195,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
     __kernel void func_name##_BF16toBF16( \
     __read_only  image2d_array_t  input, \
     __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
     ) \
 { \
     int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_BF16.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_BF16.vx
new file mode 100644
index 0000000..cfbae00
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_BF16.vx
@@ -0,0 +1,148 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_odd_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_even_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_Part0_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_Part1_2x8;
+_viv_uniform float half_pixel_value;
+
+__kernel void resize_1d_bilinear_BF16toBF16_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x      = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x         = (convert_float4(coord_x) + half_pixel_value)  * scale_x - half_pixel_value;
+    float4 left_x_f     = floor(in_x);
+    float4 x_lerp       = in_x - left_x_f;
+    int4   left_x_idx   = convert_int4(left_x_f);
+
+    vxc_short8 top;
+    vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        vxc_ushort8 src;
+        float4 left4;
+        float4 right4;
+        float4 dst4;
+
+        VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_even_2x8);
+        _viv_asm(COPY, right4, src, 16);
+        VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_odd_2x8);
+        _viv_asm(COPY, left4,  src, 16);
+        right4    -= left4;
+        dst4       = right4 * x_lerp + left4;
+        vxc_ushort8 tmp, dst;
+        _viv_asm(COPY, tmp, dst4, 16);
+        dst.s0123 = tmp.s1357;
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_BF16toBF16_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_ushort8 src0, src1, dst0;
+    vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 16;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        coord_in.y ++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        vxc_ushort8 dst_tmp;
+        float4 left4;
+        float4 right4;
+
+        VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part0_2x8);
+        _viv_asm(COPY, left4, dst_tmp, 16);
+        VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part1_2x8);
+        _viv_asm(COPY, right4, dst_tmp, 16);
+        right4     -= left4;
+        float4 dst4 = right4 * x_lerp + left4;
+
+        vxc_ushort8 tmp, dst;
+        _viv_asm(COPY, tmp, dst4, 16);
+        dst.s0123 = tmp.s1357;
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_out.y++;
+    } while (coord_out.y < out_height);
+}
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_DOWN_NX.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_DOWN_NX.vx
new file mode 100644
index 0000000..e8cc06c
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_DOWN_NX.vx
@@ -0,0 +1,136 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniResizeNxDown_2x8;
+_viv_uniform int out_height;
+
+#define RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
+    read_type read_data, save_data; \
+    data_type in0, result; \
+ \
+    int8 input_desc; \
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
+    _viv_asm(MOV, coord_in.w, baseAddr); \
+ \
+    int8 output_desc; \
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
+    _viv_asm(MOV, coord_out.w, baseAddr); \
+ \
+    while (coord_out.y < out_height) \
+    { \
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
+                VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in0, read_data, 16); \
+        VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
+        _viv_asm(COPY, save_data, result, 16); \
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
+        coord_in.y++; \
+        coord_out.y++; \
+    } \
+
+#define RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+
+
+
+#define RESIZE_1D_2X_DOWN_8BIT_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_8BIT_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_DOWN_8BIT_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+
+
+#define RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
+    read_type read_data, read_data1, save_data; \
+    data_type in0, in1, result; \
+ \
+    int8 input_desc; \
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
+    _viv_asm(MOV, coord_in.w, baseAddr); \
+ \
+    int8 output_desc; \
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
+    _viv_asm(MOV, coord_out.w, baseAddr); \
+ \
+    while (coord_out.y < out_height) \
+    { \
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in0, read_data, 16); \
+        VXC_OP4(img_load_3d, read_data1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0), \
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in1, read_data1, 16); \
+        VXC_DP2x8(result, in0, in1, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
+        _viv_asm(COPY, save_data, result, 16); \
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
+        coord_in.y++; \
+        coord_out.y++; \
+    } \
+
+#define RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+
+#define RESIZE_1D_2X_DOWN_16BIT_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_16BIT_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_DOWN_16BIT_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_F16.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_F16.vx
new file mode 100644
index 0000000..3487679
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_F16.vx
@@ -0,0 +1,216 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform VXC_512Bits uniFp16toFp32_4x4;
+_viv_uniform VXC_512Bits uniRightSubLeft_4x4;
+_viv_uniform VXC_512Bits uniExtactHalf8_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform float uint8Scale;
+_viv_uniform float output_ZP;
+_viv_uniform VXC_512Bits uniFp16toFp32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform float half_pixel_value;
+_viv_uniform VXC_512Bits uniConvertFp2FP32_left_4x4;
+_viv_uniform VXC_512Bits uniConvertFp2FP32_right_4x4;
+
+__kernel void resize_1d_bilinear_F16toF16_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 left4;
+    float4 right4;
+
+    vxc_ushort8 src, result;
+    vxc_half8 src_half, dst;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        _viv_asm(COPY, src_half, src, 16);
+
+        VXC_DP4x4(left4,  src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+
+        half4 tmp;
+        _viv_asm(CONV, tmp, dst4);
+        VXC_DP2x8(dst, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
+        _viv_asm(COPY, result, dst, 16);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+
+}
+
+__kernel void resize_1d_bilinear_F16toU8_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 left4;
+    float4 right4;
+
+    vxc_ushort8 src;
+    vxc_uchar8 result;
+    vxc_half8 src_half, dst;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        _viv_asm(COPY, src_half, src, 16);
+
+        VXC_DP4x4(left4,  src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+
+        dst4         = dst4 * uint8Scale + output_ZP;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_F16toF16_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_ushort8 src0, src1, dst0;
+    vxc_half8 top;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 16;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        _viv_asm(COPY, top, dst0, 16);
+
+        coord_in.y ++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        float4 left4;
+        float4 right4;
+
+        VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_4x4);
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_part1_4x4);
+        float4 dst4  = right4 * x_lerp + left4;
+
+        half4 tmp;
+        _viv_asm(CONV, tmp, dst4);
+        VXC_DP2x8(top, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
+        _viv_asm(COPY, dst0, top, 16);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst0,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_out.y++;
+    } while (coord_out.y < out_height);
+}
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I16.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I16.vx
new file mode 100644
index 0000000..956dc62
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I16.vx
@@ -0,0 +1,147 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
+_viv_uniform float dfpScale;
+_viv_uniform float half_pixel_value;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
+
+__kernel void resize_1d_bilinear_I16toI16_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_ushort8 src0, src1, dst0;
+
+    vxc_short8 top;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 16;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        _viv_asm(COPY, top, dst0, 16);
+
+        float4 left4;
+        float4 right4;
+
+        coord_in.y ++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * dfpScale;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_I16toI16_DOWN
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_short8 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+    vxc_short8 result;
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * dfpScale;
+
+        int4 dst     = convert_int4_rte(dst4);
+
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I8.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I8.vx
new file mode 100644
index 0000000..e25071c
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I8.vx
@@ -0,0 +1,148 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
+_viv_uniform float dfpScale;
+_viv_uniform float half_pixel_value;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
+
+__kernel void resize_1d_bilinear_I8toI8_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_uchar16 src0, dst0;
+
+    vxc_char16 top;
+
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 8;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        _viv_asm(COPY, top, dst0, 16);
+
+        coord_in.y++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+        float4 left4;
+        float4 right4;
+
+        VXC_DP4x4(left4, top, top, \
+        VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
+        VXC_DP4x4(right4, top, top, \
+        VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
+
+        float4 dst4  = right4 * x_lerp + left4;
+
+        dst4         = dst4 * dfpScale;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_I8toI8_DOWN
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_char16 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+    vxc_char16 result;
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * dfpScale;
+
+        int4 dst     = convert_int4_rte(dst4);
+
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+
+}
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8.vx
new file mode 100644
index 0000000..b25fba9
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8.vx
@@ -0,0 +1,212 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_4x4;
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_left_4x4;
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_right_4x4;
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform int input_ZP;
+_viv_uniform float uint8Scale;
+_viv_uniform float output_ZP;
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform float half_pixel_value;
+
+__kernel void resize_1d_bilinear_U8toF16_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_uchar16 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    unsigned char inputZP;
+    _viv_asm(COPY, inputZP, input_ZP, 4);
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
+        VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4 *=  uint8Scale;
+        half4 dst;
+        _viv_asm(CONV, dst, dst4);
+        vxc_short8 dst_short;
+        _viv_asm(COPY, dst_short, dst, 16);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst_short,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_U8toU8_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+
+    vxc_uchar16 src0, src1;
+
+    vxc_uchar16 top;
+
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 8;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(top, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
+                VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+        float4 left4;
+        float4 right4;
+
+        unsigned char inputZP;
+        _viv_asm(COPY, inputZP, input_ZP, 4);
+        VXC_DP4x4(left4, top, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_4x4);
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_part1_4x4);
+
+        float4 dst4 = right4 * x_lerp + left4;
+        dst4         = dst4 * uint8Scale + output_ZP;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_U8toU8_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_uchar16 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+    vxc_uchar16 result;
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    unsigned char inputZP;
+    _viv_asm(COPY, inputZP, input_ZP, 4);
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
+        VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * uint8Scale + output_ZP;
+        int4 dst     = convert_int4_rte(dst4);
+
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+
+}
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8_opt.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8_opt.vx
new file mode 100644
index 0000000..ab7e74b
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8_opt.vx
@@ -0,0 +1,78 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniBilinear_4x4;
+_viv_uniform float half_pixel_value;
+
+__kernel void resize_1d_bilinear_U8toU8_UP_opt
+    (
+    __read_only  image2d_array_t   input,
+    __write_only image2d_array_t   output,
+                             int   align_corners,
+                             int   half_pixel_centers,
+    __read_only  image2d_array_t   scale
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    int4   left_x_idx  = convert_int4(left_x_f);
+    int4   right_x_idx = left_x_idx + 1;
+
+    vxc_uchar16 src0;
+
+    vxc_uchar16 src_mask;
+
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx,
+              VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 8;
+    VXC_DP2x8(maskShift, bitextract_p0, constData,
+              VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    vxc_ushort8 lerp_0;
+    vxc_half8 lerp;
+
+    int2 coord = (int2)(coord_out.x * 2, 0);
+    VXC_ReadImage(lerp_0, scale, coord, VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    _viv_asm(COPY, lerp, lerp_0, 16);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(src_mask, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        coord_in.y++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
+                VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+        vxc_uchar16 dst;
+        VXC_DP4x4(dst, src_mask, lerp,
+                VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniBilinear_4x4);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_UP_NX.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_UP_NX.vx
new file mode 100644
index 0000000..3ddd305
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_UP_NX.vx
@@ -0,0 +1,155 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniResizeNxUp_2x8;
+_viv_uniform int out_height;
+
+#define RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+    read_type read_data, save_data; \
+    data_type in0, result; \
+ \
+    int8 input_desc; \
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
+    _viv_asm(MOV, coord_in.w, baseAddr); \
+ \
+    int8 output_desc; \
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
+    _viv_asm(MOV, coord_out.w, baseAddr); \
+ \
+    while (coord_out.y < out_height) \
+    { \
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in0, read_data, 16); \
+        VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxUp_2x8); \
+        _viv_asm(COPY, save_data, result, 16); \
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
+        coord_in.y++; \
+        coord_out.y++; \
+    } \
+
+#define RESIZE_1D_2X_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x = (coord_out.x * 2 - 1) >> 2; \
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_2X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_2X_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x >> 1; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_2X_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_4X_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x = (coord_out.x * 2 - 3) >> 3; \
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_4X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_4X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_4X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_4X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_4X_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x >> 2; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_4X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_4X_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_4X_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_4X_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_8X_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x = (coord_out.x * 2 - 7) >> 4; \
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_8X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_8X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_8X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_8X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_8X_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x >> 3; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_8X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_8X_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_8X_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_8X_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
diff --git a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_nearest.vx b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_nearest.vx
new file mode 100644
index 0000000..75d0c47
--- /dev/null
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_nearest.vx
@@ -0,0 +1,337 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniMultiplyAndPostShift_2x8;
+_viv_uniform float scale_x;
+_viv_uniform float half_pixel_value;
+_viv_uniform float round_value;
+_viv_uniform int2 multAndoutZP;//[0:15] multiplier, [31:63] output zp
+
+#define NEAREST_INDEX_PROCESS() \
+    int4   coord_out  = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
+    int4   coord_x    = coord_out.xxxx + (int4)(0, 1, 2, 3); \
+    float4 in_x       = (convert_float4(coord_x) + half_pixel_value) * scale_x + round_value; \
+    int4   in_x_idx   = convert_int4(in_x); \
+
+
+__kernel void resize_1d_nearest_F16toF16
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_short8 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+_viv_uniform VXC_512Bits uniGetExtractData_2x8;
+__kernel void resize_1d_nearest_F16toF16_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_ushort8 src0, src1, dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, \
+    VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+_viv_uniform VXC_512Bits uniConvertI8toI8_2x8;
+__kernel void resize_1d_nearest_I8toI8
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_char16 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+    VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_I8toI8_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_uchar16 src0, dst0;
+    vxc_char16 dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, \
+    VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst0, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+    _viv_asm(COPY, dst, dst0, 8);
+    VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_U8toU8
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_uchar16 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 multiplier;
+    _viv_asm(COPY, multiplier, multAndoutZP, 16);
+    VXC_DP2x8(src, src, multiplier, \
+    VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_U8toU8_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_uchar16 src0, dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+    vxc_ushort8 multiplier;
+    _viv_asm(COPY, multiplier, multAndoutZP, 16);
+    VXC_DP2x8(dst, dst, multiplier, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_I16toI16
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_short8 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+
+    VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_I16toI16_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_ushort8 src0, src1, dst0;
+    vxc_short8 dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst0, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+    _viv_asm(COPY, dst, dst0, 8);
+    VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
diff --git a/src/tim/vx/internal/src/libnnext/vsi_nn_libnnext_resource.c b/src/tim/vx/internal/src/libnnext/vsi_nn_libnnext_resource.c
index 1d65b5e..366041c 100644
--- a/src/tim/vx/internal/src/libnnext/vsi_nn_libnnext_resource.c
+++ b/src/tim/vx/internal/src/libnnext/vsi_nn_libnnext_resource.c
@@ -3177,6 +3177,8 @@ __kernel void detect_post_box_U8_U8toF32(\n\
 
 static const char eltwise_unary_2d_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
 \n\
+_viv_uniform float alpha;\n\
+\n\
 float4 eltwise_unary_sin(float4 x)\n\
 {\n\
     return native_sin(x);\n\
@@ -3201,7 +3203,7 @@ float4 eltwise_unary_log(float4 x)\n\
 float4 eltwise_unary_elu(float4 val)\n\
 {\n\
     float4 x = val * logE;\n\
-    x = exp2(x) - 1;\n\
+    x = exp2(x) * alpha - alpha;\n\
 \n\
     return val < 0 ? x : val;\n\
 }\n\
@@ -3255,7 +3257,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;\n\
     __kernel void func_name##_##src_type_name##to##dst_type_name##_2D( \\\n\
     __read_only  image2d_array_t  input, \\\n\
     __write_only image2d_array_t  output, \\\n\
-                 int              type \\\n\
+                 int              type, \\\n\
+                 float            _alpha \\\n\
     ) \\\n\
 { \\\n\
     int2 coord = (int2)(get_global_id(0), get_global_id(1)); \\\n\
@@ -3371,7 +3374,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;\n\
     __kernel void func_name##_BF16toBF16_2D( \\\n\
     __read_only  image2d_array_t  input, \\\n\
     __write_only image2d_array_t  output, \\\n\
-                 int              type \\\n\
+                 int              type, \\\n\
+                 float            _alpha \\\n\
     ) \\\n\
 { \\\n\
     int2 coord = (int2)(get_global_id(0), get_global_id(1)); \\\n\
@@ -3412,6 +3416,8 @@ ELTSISE_UNARY_BF16_2D(hard_sigmoid)\n\
 
 static const char eltwise_unary_3d_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
 \n\
+_viv_uniform float alpha;\n\
+\n\
 float4 eltwise_unary_sin(float4 x)\n\
 {\n\
     return native_sin(x);\n\
@@ -3436,7 +3442,7 @@ float4 eltwise_unary_log(float4 x)\n\
 float4 eltwise_unary_elu(float4 val)\n\
 {\n\
     float4 x = val * logE;\n\
-    x = exp2(x) - 1;\n\
+    x = exp2(x) * alpha - alpha;\n\
 \n\
     return val < 0 ? x : val;\n\
 }\n\
@@ -3490,7 +3496,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;\n\
 __kernel void func_name##_##src_type_name##to##dst_type_name( \\\n\
     __read_only  image2d_array_t  input, \\\n\
     __write_only image2d_array_t  output, \\\n\
-                 int              type \\\n\
+                 int              type, \\\n\
+                 float            _alpha \\\n\
     ) \\\n\
 { \\\n\
     int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \\\n\
@@ -3604,7 +3611,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;\n\
     __kernel void func_name##_BF16toBF16( \\\n\
     __read_only  image2d_array_t  input, \\\n\
     __write_only image2d_array_t  output, \\\n\
-                 int              type \\\n\
+                 int              type, \\\n\
+                 float            _alpha \\\n\
     ) \\\n\
 { \\\n\
     int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \\\n\
@@ -28076,6 +28084,1601 @@ TENSOR_KERAS_RELU(U8,   U8,   _2D, Image,        U8toF32,   F32toU8,   vxc_uchar
 TENSOR_KERAS_RELU(U8,   F16,  _2D, Image,        U8toF32,   F32toF16,  vxc_uchar8,  vxc_ushort8)\n\
 "; /* end of relu_keras_vx*/
 
+static const char resize_1d_bilinear_BF16_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform float scale_x;\n\
+_viv_uniform int out_height;\n\
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;\n\
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;\n\
+_viv_uniform VXC_512Bits uniConvBF16toF32_odd_2x8;\n\
+_viv_uniform VXC_512Bits uniConvBF16toF32_even_2x8;\n\
+_viv_uniform VXC_512Bits uniConvBF16toF32_Part0_2x8;\n\
+_viv_uniform VXC_512Bits uniConvBF16toF32_Part1_2x8;\n\
+_viv_uniform float half_pixel_value;\n\
+\n\
+__kernel void resize_1d_bilinear_BF16toBF16_DOWN\n\
+    (\n\
+    __read_only     image2d_array_t input,\n\
+    __write_only    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x      = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x         = (convert_float4(coord_x) + half_pixel_value)  * scale_x - half_pixel_value;\n\
+    float4 left_x_f     = floor(in_x);\n\
+    float4 x_lerp       = in_x - left_x_f;\n\
+    int4   left_x_idx   = convert_int4(left_x_f);\n\
+\n\
+    vxc_short8 top;\n\
+    vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.y;\n\
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.z;\n\
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.w;\n\
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        vxc_ushort8 src;\n\
+        float4 left4;\n\
+        float4 right4;\n\
+        float4 dst4;\n\
+\n\
+        VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_even_2x8);\n\
+        _viv_asm(COPY, right4, src, 16);\n\
+        VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_odd_2x8);\n\
+        _viv_asm(COPY, left4,  src, 16);\n\
+        right4    -= left4;\n\
+        dst4       = right4 * x_lerp + left4;\n\
+        vxc_ushort8 tmp, dst;\n\
+        _viv_asm(COPY, tmp, dst4, 16);\n\
+        dst.s0123 = tmp.s1357;\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+\n\
+__kernel void resize_1d_bilinear_BF16toBF16_UP\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    float4 right_x_f   = ceil(in_x);\n\
+    int4   right_x_idx = convert_int4(right_x_f);\n\
+\n\
+    vxc_ushort8 src0, src1, dst0;\n\
+    vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_ushort8 bitextract_p0;\n\
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};\n\
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);\n\
+    vxc_ushort8 constData = 16;\n\
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        coord_in.y ++;\n\
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        vxc_ushort8 dst_tmp;\n\
+        float4 left4;\n\
+        float4 right4;\n\
+\n\
+        VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part0_2x8);\n\
+        _viv_asm(COPY, left4, dst_tmp, 16);\n\
+        VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part1_2x8);\n\
+        _viv_asm(COPY, right4, dst_tmp, 16);\n\
+        right4     -= left4;\n\
+        float4 dst4 = right4 * x_lerp + left4;\n\
+\n\
+        vxc_ushort8 tmp, dst;\n\
+        _viv_asm(COPY, tmp, dst4, 16);\n\
+        dst.s0123 = tmp.s1357;\n\
+\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_out.y++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+"; /* end of resize_1d_bilinear_BF16_vx*/
+
+static const char resize_1d_bilinear_DOWN_NX_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform VXC_512Bits uniResizeNxDown_2x8;\n\
+_viv_uniform int out_height;\n\
+\n\
+#define RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \\\n\
+    read_type read_data, save_data; \\\n\
+    data_type in0, result; \\\n\
+ \\\n\
+    int8 input_desc; \\\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \\\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \\\n\
+    _viv_asm(MOV, coord_in.w, baseAddr); \\\n\
+ \\\n\
+    int8 output_desc; \\\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \\\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \\\n\
+    _viv_asm(MOV, coord_out.w, baseAddr); \\\n\
+ \\\n\
+    while (coord_out.y < out_height) \\\n\
+    { \\\n\
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \\\n\
+                VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0)); \\\n\
+        _viv_asm(COPY, in0, read_data, 16); \\\n\
+        VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \\\n\
+        _viv_asm(COPY, save_data, result, 16); \\\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \\\n\
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \\\n\
+        coord_in.y++; \\\n\
+        coord_out.y++; \\\n\
+    } \\\n\
+\n\
+#define RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x =  coord_out.x << 1; \\\n\
+    RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+\n\
+\n\
+\n\
+#define RESIZE_1D_2X_DOWN_8BIT_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x =  coord_out.x << 1; \\\n\
+    RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_2X_DOWN_8BIT_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_2X_DOWN_8BIT_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+\n\
+\n\
+#define RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \\\n\
+    read_type read_data, read_data1, save_data; \\\n\
+    data_type in0, in1, result; \\\n\
+ \\\n\
+    int8 input_desc; \\\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \\\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \\\n\
+    _viv_asm(MOV, coord_in.w, baseAddr); \\\n\
+ \\\n\
+    int8 output_desc; \\\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \\\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \\\n\
+    _viv_asm(MOV, coord_out.w, baseAddr); \\\n\
+ \\\n\
+    while (coord_out.y < out_height) \\\n\
+    { \\\n\
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \\\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \\\n\
+        _viv_asm(COPY, in0, read_data, 16); \\\n\
+        VXC_OP4(img_load_3d, read_data1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0), \\\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \\\n\
+        _viv_asm(COPY, in1, read_data1, 16); \\\n\
+        VXC_DP2x8(result, in0, in1, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \\\n\
+        _viv_asm(COPY, save_data, result, 16); \\\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \\\n\
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \\\n\
+        coord_in.y++; \\\n\
+        coord_out.y++; \\\n\
+    } \\\n\
+\n\
+#define RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x =  coord_out.x << 1; \\\n\
+    RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+\n\
+#define RESIZE_1D_2X_DOWN_16BIT_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x =  coord_out.x << 1; \\\n\
+    RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_2X_DOWN_16BIT_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_2X_DOWN_16BIT_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+"; /* end of resize_1d_bilinear_DOWN_NX_vx*/
+
+static const char resize_1d_bilinear_F16_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;\n\
+_viv_uniform VXC_512Bits uniFp16toFp32_4x4;\n\
+_viv_uniform VXC_512Bits uniRightSubLeft_4x4;\n\
+_viv_uniform VXC_512Bits uniExtactHalf8_2x8;\n\
+_viv_uniform float scale_x;\n\
+_viv_uniform int out_height;\n\
+_viv_uniform float uint8Scale;\n\
+_viv_uniform float output_ZP;\n\
+_viv_uniform VXC_512Bits uniFp16toFp32_part1_4x4;\n\
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;\n\
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;\n\
+_viv_uniform float half_pixel_value;\n\
+_viv_uniform VXC_512Bits uniConvertFp2FP32_left_4x4;\n\
+_viv_uniform VXC_512Bits uniConvertFp2FP32_right_4x4;\n\
+\n\
+__kernel void resize_1d_bilinear_F16toF16_DOWN\n\
+    (\n\
+    __read_only     image2d_array_t input,\n\
+    __write_only    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    float4 left4;\n\
+    float4 right4;\n\
+\n\
+    vxc_ushort8 src, result;\n\
+    vxc_half8 src_half, dst;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.y;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.z;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.w;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        _viv_asm(COPY, src_half, src, 16);\n\
+\n\
+        VXC_DP4x4(left4,  src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);\n\
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);\n\
+        right4      -= left4;\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+\n\
+        half4 tmp;\n\
+        _viv_asm(CONV, tmp, dst4);\n\
+        VXC_DP2x8(dst, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);\n\
+        _viv_asm(COPY, result, dst, 16);\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+\n\
+}\n\
+\n\
+__kernel void resize_1d_bilinear_F16toU8_DOWN\n\
+    (\n\
+    __read_only     image2d_array_t input,\n\
+    __write_only    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    float4 left4;\n\
+    float4 right4;\n\
+\n\
+    vxc_ushort8 src;\n\
+    vxc_uchar8 result;\n\
+    vxc_half8 src_half, dst;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.y;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.z;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.w;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        _viv_asm(COPY, src_half, src, 16);\n\
+\n\
+        VXC_DP4x4(left4,  src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);\n\
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);\n\
+        right4      -= left4;\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+\n\
+        dst4         = dst4 * uint8Scale + output_ZP;\n\
+        int4 dst     = convert_int4_rte(dst4);\n\
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);\n\
+\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+\n\
+__kernel void resize_1d_bilinear_F16toF16_UP\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    float4 right_x_f   = ceil(in_x);\n\
+    int4   right_x_idx = convert_int4(right_x_f);\n\
+\n\
+    vxc_ushort8 src0, src1, dst0;\n\
+    vxc_half8 top;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_ushort8 bitextract_p0;\n\
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};\n\
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);\n\
+    vxc_ushort8 constData = 16;\n\
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+        _viv_asm(COPY, top, dst0, 16);\n\
+\n\
+        coord_in.y ++;\n\
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        float4 left4;\n\
+        float4 right4;\n\
+\n\
+        VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_4x4);\n\
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_part1_4x4);\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+\n\
+        half4 tmp;\n\
+        _viv_asm(CONV, tmp, dst4);\n\
+        VXC_DP2x8(top, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);\n\
+        _viv_asm(COPY, dst0, top, 16);\n\
+\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst0,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_out.y++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+"; /* end of resize_1d_bilinear_F16_vx*/
+
+static const char resize_1d_bilinear_I16_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;\n\
+_viv_uniform float scale_x;\n\
+_viv_uniform int out_height;\n\
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;\n\
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;\n\
+_viv_uniform float dfpScale;\n\
+_viv_uniform float half_pixel_value;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;\n\
+\n\
+__kernel void resize_1d_bilinear_I16toI16_UP\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    float4 right_x_f   = ceil(in_x);\n\
+    int4   right_x_idx = convert_int4(right_x_f);\n\
+\n\
+    vxc_ushort8 src0, src1, dst0;\n\
+\n\
+    vxc_short8 top;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_ushort8 bitextract_p0;\n\
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};\n\
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);\n\
+    vxc_ushort8 constData = 16;\n\
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+        _viv_asm(COPY, top, dst0, 16);\n\
+\n\
+        float4 left4;\n\
+        float4 right4;\n\
+\n\
+        coord_in.y ++;\n\
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);\n\
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+        dst4         = dst4 * dfpScale;\n\
+        int4 dst     = convert_int4_rte(dst4);\n\
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);\n\
+\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+\n\
+__kernel void resize_1d_bilinear_I16toI16_DOWN\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    vxc_short8 src;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+    float4 left4;\n\
+    float4 right4;\n\
+    vxc_short8 result;\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+\n\
+    do\n\
+    {\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.y;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.z;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.w;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);\n\
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);\n\
+        right4      -= left4;\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+        dst4         = dst4 * dfpScale;\n\
+\n\
+        int4 dst     = convert_int4_rte(dst4);\n\
+\n\
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+\n\
+"; /* end of resize_1d_bilinear_I16_vx*/
+
+static const char resize_1d_bilinear_I8_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;\n\
+_viv_uniform float scale_x;\n\
+_viv_uniform int out_height;\n\
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;\n\
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;\n\
+_viv_uniform float dfpScale;\n\
+_viv_uniform float half_pixel_value;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;\n\
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;\n\
+\n\
+__kernel void resize_1d_bilinear_I8toI8_UP\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    float4 right_x_f   = ceil(in_x);\n\
+    int4   right_x_idx = convert_int4(right_x_f);\n\
+\n\
+    vxc_uchar16 src0, dst0;\n\
+\n\
+    vxc_char16 top;\n\
+\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_ushort8 bitextract_p0;\n\
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};\n\
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);\n\
+    vxc_ushort8 constData = 8;\n\
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_BitExtract(dst0, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+        _viv_asm(COPY, top, dst0, 16);\n\
+\n\
+        coord_in.y++;\n\
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+                VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        float4 left4;\n\
+        float4 right4;\n\
+\n\
+        VXC_DP4x4(left4, top, top, \\\n\
+        VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);\n\
+        VXC_DP4x4(right4, top, top, \\\n\
+        VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);\n\
+\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+\n\
+        dst4         = dst4 * dfpScale;\n\
+        int4 dst     = convert_int4_rte(dst4);\n\
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+\n\
+__kernel void resize_1d_bilinear_I8toI8_DOWN\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    vxc_char16 src;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+    float4 left4;\n\
+    float4 right4;\n\
+    vxc_char16 result;\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+\n\
+    do\n\
+    {\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.y;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.z;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.w;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);\n\
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);\n\
+        right4      -= left4;\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+        dst4         = dst4 * dfpScale;\n\
+\n\
+        int4 dst     = convert_int4_rte(dst4);\n\
+\n\
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+\n\
+}\n\
+"; /* end of resize_1d_bilinear_I8_vx*/
+
+static const char resize_1d_bilinear_U8_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_4x4;\n\
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_left_4x4;\n\
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_right_4x4;\n\
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;\n\
+_viv_uniform float scale_x;\n\
+_viv_uniform int out_height;\n\
+_viv_uniform int input_ZP;\n\
+_viv_uniform float uint8Scale;\n\
+_viv_uniform float output_ZP;\n\
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_part1_4x4;\n\
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;\n\
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;\n\
+_viv_uniform float half_pixel_value;\n\
+\n\
+__kernel void resize_1d_bilinear_U8toF16_DOWN\n\
+    (\n\
+    __read_only     image2d_array_t input,\n\
+    __write_only    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    vxc_uchar16 src;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+    float4 left4;\n\
+    float4 right4;\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    unsigned char inputZP;\n\
+    _viv_asm(COPY, inputZP, input_ZP, 4);\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+\n\
+    do\n\
+    {\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.y;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.z;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.w;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);\n\
+        VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);\n\
+        right4      -= left4;\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+        dst4 *=  uint8Scale;\n\
+        half4 dst;\n\
+        _viv_asm(CONV, dst, dst4);\n\
+        vxc_short8 dst_short;\n\
+        _viv_asm(COPY, dst_short, dst, 16);\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst_short,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+\n\
+__kernel void resize_1d_bilinear_U8toU8_UP\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    float4 right_x_f   = ceil(in_x);\n\
+    int4   right_x_idx = convert_int4(right_x_f);\n\
+\n\
+\n\
+    vxc_uchar16 src0, src1;\n\
+\n\
+    vxc_uchar16 top;\n\
+\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_ushort8 bitextract_p0;\n\
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};\n\
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);\n\
+    vxc_ushort8 constData = 8;\n\
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \\\n\
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_BitExtract(top, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww,\n\
+                VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        float4 left4;\n\
+        float4 right4;\n\
+\n\
+        unsigned char inputZP;\n\
+        _viv_asm(COPY, inputZP, input_ZP, 4);\n\
+        VXC_DP4x4(left4, top, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_4x4);\n\
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_part1_4x4);\n\
+\n\
+        float4 dst4 = right4 * x_lerp + left4;\n\
+        dst4         = dst4 * uint8Scale + output_ZP;\n\
+        int4 dst     = convert_int4_rte(dst4);\n\
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+\n\
+__kernel void resize_1d_bilinear_U8toU8_DOWN\n\
+    (\n\
+    __read_only     image2d_array_t input,\n\
+    __write_only    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float4 left_x_f    = floor(in_x);\n\
+    float4 x_lerp      = in_x - left_x_f;\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    vxc_uchar16 src;\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+    float4 left4;\n\
+    float4 right4;\n\
+    vxc_uchar16 result;\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    unsigned char inputZP;\n\
+    _viv_asm(COPY, inputZP, input_ZP, 4);\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+\n\
+    do\n\
+    {\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.y;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.z;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));\n\
+        coord_in.x = left_x_idx.w;\n\
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);\n\
+        VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);\n\
+        right4      -= left4;\n\
+        float4 dst4  = right4 * x_lerp + left4;\n\
+        dst4         = dst4 * uint8Scale + output_ZP;\n\
+        int4 dst     = convert_int4_rte(dst4);\n\
+\n\
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);\n\
+\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+        coord_in.y++;\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+\n\
+}\n\
+"; /* end of resize_1d_bilinear_U8_vx*/
+
+static const char resize_1d_bilinear_U8_opt_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform float scale_x;\n\
+_viv_uniform int out_height;\n\
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;\n\
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;\n\
+_viv_uniform VXC_512Bits uniBilinear_4x4;\n\
+_viv_uniform float half_pixel_value;\n\
+\n\
+__kernel void resize_1d_bilinear_U8toU8_UP_opt\n\
+    (\n\
+    __read_only  image2d_array_t   input,\n\
+    __write_only image2d_array_t   output,\n\
+                             int   align_corners,\n\
+                             int   half_pixel_centers,\n\
+    __read_only  image2d_array_t   scale\n\
+    )\n\
+{\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(2), 0);\n\
+\n\
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);\n\
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+\n\
+    float4 left_x_f    = floor(in_x);\n\
+    int4   left_x_idx  = convert_int4(left_x_f);\n\
+    int4   right_x_idx = left_x_idx + 1;\n\
+\n\
+    vxc_uchar16 src0;\n\
+\n\
+    vxc_uchar16 src_mask;\n\
+\n\
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_ushort8 bitextract_p0;\n\
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};\n\
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx,\n\
+              VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);\n\
+    vxc_ushort8 constData = 8;\n\
+    VXC_DP2x8(maskShift, bitextract_p0, constData,\n\
+              VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);\n\
+\n\
+    vxc_ushort8 lerp_0;\n\
+    vxc_half8 lerp;\n\
+\n\
+    int2 coord = (int2)(coord_out.x * 2, 0);\n\
+    VXC_ReadImage(lerp_0, scale, coord, VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+    _viv_asm(COPY, lerp, lerp_0, 16);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    do\n\
+    {\n\
+        VXC_BitExtract(src_mask, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        coord_in.y++;\n\
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww,\n\
+                VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+        vxc_uchar16 dst;\n\
+        VXC_DP4x4(dst, src_mask, lerp,\n\
+                VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniBilinear_4x4);\n\
+\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,\n\
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+\n\
+        coord_out.y ++;\n\
+    } while (coord_out.y < out_height);\n\
+}\n\
+"; /* end of resize_1d_bilinear_U8_opt_vx*/
+
+static const char resize_1d_bilinear_UP_NX_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform VXC_512Bits uniResizeNxUp_2x8;\n\
+_viv_uniform int out_height;\n\
+\n\
+#define RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \\\n\
+    read_type read_data, save_data; \\\n\
+    data_type in0, result; \\\n\
+ \\\n\
+    int8 input_desc; \\\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \\\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \\\n\
+    _viv_asm(MOV, coord_in.w, baseAddr); \\\n\
+ \\\n\
+    int8 output_desc; \\\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \\\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \\\n\
+    _viv_asm(MOV, coord_out.w, baseAddr); \\\n\
+ \\\n\
+    while (coord_out.y < out_height) \\\n\
+    { \\\n\
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \\\n\
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \\\n\
+        _viv_asm(COPY, in0, read_data, 16); \\\n\
+        VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxUp_2x8); \\\n\
+        _viv_asm(COPY, save_data, result, 16); \\\n\
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \\\n\
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \\\n\
+        coord_in.y++; \\\n\
+        coord_out.y++; \\\n\
+    } \\\n\
+\n\
+#define RESIZE_1D_2X_HALF_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_HALF_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x = (coord_out.x * 2 - 1) >> 2; \\\n\
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \\\n\
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_2X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_2X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+RESIZE_1D_2X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_2X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+#define RESIZE_1D_2X_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x =  coord_out.x >> 1; \\\n\
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_2X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_2X_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+RESIZE_1D_2X_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_2X_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+#define RESIZE_1D_4X_HALF_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_HALF_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x = (coord_out.x * 2 - 3) >> 3; \\\n\
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \\\n\
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_4X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_4X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+RESIZE_1D_4X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_4X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+#define RESIZE_1D_4X_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x =  coord_out.x >> 2; \\\n\
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_4X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_4X_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+RESIZE_1D_4X_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_4X_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+#define RESIZE_1D_8X_HALF_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_HALF_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x = (coord_out.x * 2 - 7) >> 4; \\\n\
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \\\n\
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_8X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_8X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+RESIZE_1D_8X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_8X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+#define RESIZE_1D_8X_SAME(name0, name1, read_type, data_type) \\\n\
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_SAME \\\n\
+    ( \\\n\
+    __read_only  image2d_array_t   input, \\\n\
+    __write_only image2d_array_t   output, \\\n\
+                             int   scale_type \\\n\
+    ) \\\n\
+{ \\\n\
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \\\n\
+    coord_in.x =  coord_out.x >> 3; \\\n\
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \\\n\
+}\n\
+\n\
+RESIZE_1D_8X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)\n\
+RESIZE_1D_8X_SAME(I8,  I8,  vxc_char16,  vxc_char16)\n\
+RESIZE_1D_8X_SAME(I16, I16, vxc_short8,  vxc_short8)\n\
+RESIZE_1D_8X_SAME(F16, F16, vxc_short8,  vxc_half8)\n\
+\n\
+\n\
+"; /* end of resize_1d_bilinear_UP_NX_vx*/
+
+static const char resize_1d_nearest_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
+\n\
+_viv_uniform VXC_512Bits uniMultiplyAndPostShift_2x8;\n\
+_viv_uniform float scale_x;\n\
+_viv_uniform float half_pixel_value;\n\
+_viv_uniform float round_value;\n\
+_viv_uniform int2 multAndoutZP;//[0:15] multiplier, [31:63] output zp\n\
+\n\
+#define NEAREST_INDEX_PROCESS() \\\n\
+    int4   coord_out  = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \\\n\
+    int4   coord_x    = coord_out.xxxx + (int4)(0, 1, 2, 3); \\\n\
+    float4 in_x       = (convert_float4(coord_x) + half_pixel_value) * scale_x + round_value; \\\n\
+    int4   in_x_idx   = convert_int4(in_x); \\\n\
+\n\
+\n\
+__kernel void resize_1d_nearest_F16toF16\n\
+    (\n\
+    __read_only  image2d_array_t input,\n\
+    __write_only image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_short8 src;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.y;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.z;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.w;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+\n\
+_viv_uniform VXC_512Bits uniGetExtractData_2x8;\n\
+__kernel void resize_1d_nearest_F16toF16_op\n\
+    (\n\
+    __read_only  image2d_array_t input,\n\
+    __write_only image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_ushort8 src0, src1, dst;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);\n\
+    vxc_ushort8 input_idx;\n\
+    _viv_asm(COPY, input_idx, in_x_idx, 16);\n\
+    VXC_DP2x8(mask, input_idx, input_idx, \\\n\
+    VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);\n\
+    VXC_BitExtract(dst, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+\n\
+_viv_uniform VXC_512Bits uniConvertI8toI8_2x8;\n\
+__kernel void resize_1d_nearest_I8toI8\n\
+    (\n\
+    __read_only  image2d_array_t input,\n\
+    __write_only image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_char16 src;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.y;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.z;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.w;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));\n\
+    VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+\n\
+__kernel void resize_1d_nearest_I8toI8_op\n\
+    (\n\
+    __read_only  image2d_array_t input,\n\
+    __write_only image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_uchar16 src0, dst0;\n\
+    vxc_char16 dst;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);\n\
+    vxc_ushort8 input_idx;\n\
+    _viv_asm(COPY, input_idx, in_x_idx, 16);\n\
+    VXC_DP2x8(mask, input_idx, input_idx, \\\n\
+    VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);\n\
+    VXC_BitExtract(dst0, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));\n\
+    _viv_asm(COPY, dst, dst0, 8);\n\
+    VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+\n\
+__kernel void resize_1d_nearest_U8toU8\n\
+    (\n\
+    image2d_array_t input,\n\
+    image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_uchar16 src;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.y;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.z;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.w;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_ushort8 multiplier;\n\
+    _viv_asm(COPY, multiplier, multAndoutZP, 16);\n\
+    VXC_DP2x8(src, src, multiplier, \\\n\
+    VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);\n\
+\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+\n\
+__kernel void resize_1d_nearest_U8toU8_op\n\
+    (\n\
+    __read_only  image2d_array_t input,\n\
+    __write_only image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_uchar16 src0, dst;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);\n\
+    vxc_ushort8 input_idx;\n\
+    _viv_asm(COPY, input_idx, in_x_idx, 16);\n\
+    VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);\n\
+    VXC_BitExtract(dst, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));\n\
+    vxc_ushort8 multiplier;\n\
+    _viv_asm(COPY, multiplier, multAndoutZP, 16);\n\
+    VXC_DP2x8(dst, dst, multiplier, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);\n\
+\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+\n\
+__kernel void resize_1d_nearest_I16toI16\n\
+    (\n\
+    __read_only  image2d_array_t input,\n\
+    __write_only image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_short8 src;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.y;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.z;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));\n\
+    coord_in.x = in_x_idx.w;\n\
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);\n\
+\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+\n\
+__kernel void resize_1d_nearest_I16toI16_op\n\
+    (\n\
+    __read_only  image2d_array_t input,\n\
+    __write_only image2d_array_t output,\n\
+    int   align_corners,\n\
+    int   half_pixel_centers\n\
+    )\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+\n\
+    vxc_ushort8 src0, src1, dst0;\n\
+    vxc_short8 dst;\n\
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);\n\
+\n\
+    int8 input_desc;\n\
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));\n\
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;\n\
+    _viv_asm(MOV, coord_in.w, baseAddr);\n\
+\n\
+    int8 output_desc;\n\
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));\n\
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;\n\
+    _viv_asm(MOV, coord_out.w, baseAddr);\n\
+\n\
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),\n\
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),\n\
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));\n\
+\n\
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);\n\
+    vxc_ushort8 input_idx;\n\
+    _viv_asm(COPY, input_idx, in_x_idx, 16);\n\
+    VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);\n\
+    VXC_BitExtract(dst0, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));\n\
+    _viv_asm(COPY, dst, dst0, 8);\n\
+    VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);\n\
+\n\
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,\n\
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));\n\
+}\n\
+"; /* end of resize_1d_nearest_vx*/
+
 static const char resize_bilinear_BF16_vx[] = "#include \"cl_viv_vx_ext.h\"\n\
 \n\
 _viv_uniform float2 scale_xy;\n\
@@ -36151,14 +37754,14 @@ static const char eltwise_ops_helper_cl[] = "#pragma OPENCL EXTENSION CL_VIV_asm
 "; /* end of eltwise_ops_helper_cl*/
 
 static const char eltwise_unary_cl[] = "\n\
-float4 eltwise_unary_sin(float4 x)\n\
+float4 eltwise_unary_sin(float4 x, float alpha)\n\
 {\n\
     return native_sin(x);\n\
 }\n\
 \n\
 #define logE        (1.44269502f)\n\
 #define twoLogE     (logE * 2.0f)\n\
-float4 eltwise_unary_exp(float4 x)\n\
+float4 eltwise_unary_exp(float4 x, float alpha)\n\
 {\n\
     x *= logE;\n\
     x = exp2(x);\n\
@@ -36166,33 +37769,33 @@ float4 eltwise_unary_exp(float4 x)\n\
 }\n\
 \n\
 #define rlogE    (0.693147182f)\n\
-float4 eltwise_unary_log(float4 x)\n\
+float4 eltwise_unary_log(float4 x, float alpha)\n\
 {\n\
     x = log2(x);\n\
     return x * rlogE;\n\
 }\n\
 \n\
-float4 eltwise_unary_elu(float4 val)\n\
+float4 eltwise_unary_elu(float4 val, float alpha)\n\
 {\n\
     float4 x = val * logE;\n\
-    x = exp2(x) - 1;\n\
+    x = exp2(x) * alpha - alpha;\n\
 \n\
     return val < 0 ? x : val;\n\
 }\n\
 \n\
-float4 eltwise_unary_neg(float4 x)\n\
+float4 eltwise_unary_neg(float4 x, float alpha)\n\
 {\n\
     return x * -1;\n\
 }\n\
 \n\
-float4 eltwise_unary_hard_sigmoid(float4 x)\n\
+float4 eltwise_unary_hard_sigmoid(float4 x, float alpha)\n\
 {\n\
     x = 0.2 * x + 0.5;\n\
     x = clamp(x, 0, 1);\n\
     return x;\n\
 }\n\
 \n\
-float4 _softrelu(float4 x)\n\
+float4 _softrelu(float4 x, float alpha)\n\
 {\n\
     x *= logE;\n\
     x = exp2(x);\n\
@@ -36201,7 +37804,7 @@ float4 _softrelu(float4 x)\n\
     return x * rlogE;\n\
 }\n\
 \n\
-float4 _tanh(float4 x)\n\
+float4 _tanh(float4 x, float alpha)\n\
 {\n\
     x *= -twoLogE;\n\
     x = 1 + exp2(x);\n\
@@ -36209,10 +37812,10 @@ float4 _tanh(float4 x)\n\
     return (2 * x - 1);\n\
 }\n\
 \n\
-float4 eltwise_unary_mish(float4 x)\n\
+float4 eltwise_unary_mish(float4 x, float alpha)\n\
 {\n\
-    float4 y = _softrelu(x);\n\
-    x = x * _tanh(y);\n\
+    float4 y = _softrelu(x, alpha);\n\
+    x = x * _tanh(y, alpha);\n\
     return x;\n\
 }\n\
 \n\
@@ -36224,14 +37827,15 @@ __kernel void func_name##_F32toF32 \\\n\
                  float           inputScale, \\\n\
                  float           inputTail, \\\n\
                  float           outputScale, \\\n\
-                 float           outputZP \\\n\
+                 float           outputZP, \\\n\
+                 float           alpha \\\n\
     ) \\\n\
 { \\\n\
     int4 coord =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \\\n\
  \\\n\
     float4 src = read_imagef(input, coord); \\\n\
  \\\n\
-    float4 dst = eltwise_unary_##func_name(src); \\\n\
+    float4 dst = eltwise_unary_##func_name(src, alpha); \\\n\
  \\\n\
     write_imagef(output, coord, dst); \\\n\
 }\n\
@@ -36251,14 +37855,15 @@ __kernel void func_name##_F32toF32_2D \\\n\
                  float     inputScale, \\\n\
                  float     inputTail, \\\n\
                  float     outputScale, \\\n\
-                 float     outputZP \\\n\
+                 float     outputZP, \\\n\
+                 float     alpha \\\n\
     ) \\\n\
 { \\\n\
     int2 coord =  (int2)(get_global_id(0), get_global_id(1)); \\\n\
  \\\n\
     float4 src = read_imagef(input, coord); \\\n\
  \\\n\
-    float4 dst = eltwise_unary_##func_name(src); \\\n\
+    float4 dst = eltwise_unary_##func_name(src, alpha); \\\n\
  \\\n\
     write_imagef(output, coord, dst); \\\n\
 }\n\
@@ -36278,7 +37883,8 @@ __kernel void func_name##_U8toU8 \\\n\
                  float           inputScale, \\\n\
                  float           inputTail, \\\n\
                  float           outputScale, \\\n\
-                 float           outputZP \\\n\
+                 float           outputZP, \\\n\
+                 float           alpha \\\n\
     ) \\\n\
 { \\\n\
     int4 coord =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \\\n\
@@ -36286,7 +37892,7 @@ __kernel void func_name##_U8toU8 \\\n\
     uint4 src = read_imageui(input, coord); \\\n\
     float4 data = convert_float4(src) * inputScale - inputTail; \\\n\
  \\\n\
-    data = eltwise_unary_##func_name(data); \\\n\
+    data = eltwise_unary_##func_name(data, alpha); \\\n\
     uint4 dst = convert_uint4(data * outputScale + outputZP); \\\n\
  \\\n\
     write_imageui(output, coord, dst); \\\n\
@@ -36307,7 +37913,8 @@ __kernel void func_name##_U8toU8_2D \\\n\
                  float     inputScale, \\\n\
                  float     inputTail, \\\n\
                  float     outputScale, \\\n\
-                 float     outputZP \\\n\
+                 float     outputZP, \\\n\
+                 float     alpha \\\n\
     ) \\\n\
 { \\\n\
     int2 coord =  (int2)(get_global_id(0), get_global_id(1)); \\\n\
@@ -36315,7 +37922,7 @@ __kernel void func_name##_U8toU8_2D \\\n\
     uint4 src = read_imageui(input, coord); \\\n\
     float4 data = convert_float4(src) * inputScale - inputTail; \\\n\
  \\\n\
-    data = eltwise_unary_##func_name(data); \\\n\
+    data = eltwise_unary_##func_name(data, alpha); \\\n\
     uint4 dst = convert_uint4(data * outputScale + outputZP); \\\n\
  \\\n\
     write_imageui(output, coord, dst); \\\n\
@@ -36336,7 +37943,8 @@ __kernel void neg_I32toI32\n\
                  float           inputScale,\n\
                  float           inputTail,\n\
                  float           outputScale,\n\
-                 float           outputZP\n\
+                 float           outputZP,\n\
+                 float           alpha\n\
     )\n\
 {\n\
     int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
@@ -36354,7 +37962,8 @@ __kernel void neg_I32toI32_2D\n\
                  float     inputScale,\n\
                  float     inputTail,\n\
                  float     outputScale,\n\
-                 float     outputZP\n\
+                 float     outputZP,\n\
+                 float     alpha\n\
     )\n\
 {\n\
     int2 coord =  (int2)(get_global_id(0), get_global_id(1));\n\
@@ -45352,6 +46961,103 @@ __kernel void relu_keras_U8toF32_2D(\n\
     write_imagef(output, coord, dst);\n\
 }"; /* end of relu_keras_cl*/
 
+static const char resize_1d_bilinear_cl[] = "__kernel void resize_1d_bilinear_F32toF32(\n\
+    __read_only  image2d_array_t  input,\n\
+    __write_only image2d_array_t  output,\n\
+                           float  scale_x,\n\
+                           float  half_pixel_value\n\
+                           )\n\
+{\n\
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    float  in_x         = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float  left_x_f     = floor(in_x);\n\
+    float  x_lerp       = in_x - left_x_f;\n\
+    int    left_x_idx   = convert_int(left_x_f);\n\
+    int4   coord_in     = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);\n\
+    float4 top_l, top_r, top, bottom, dst;\n\
+\n\
+    top_l    = read_imagef(input, coord_in);\n\
+    coord_in.x++;\n\
+    top_r    = read_imagef(input, coord_in);\n\
+\n\
+    top_r    = top_r - top_l;\n\
+    dst      = top_l + x_lerp * top_r;\n\
+\n\
+    write_imagef(output, coord_out, dst);\n\
+\n\
+}\n\
+\n\
+\n\
+__kernel void resize_1d_bilinear_U8toU8(\n\
+    __read_only  image2d_array_t  input,\n\
+    __write_only image2d_array_t  output,\n\
+                           float  scale_x,\n\
+                           float  half_pixel_value,\n\
+                           float  in_scale,\n\
+                           float  in_tail,\n\
+                           float  out_scale,\n\
+                           float  out_tail\n\
+                           )\n\
+{\n\
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);\n\
+    float  in_x         = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;\n\
+    float  left_x_f     = floor(in_x);\n\
+    float  x_lerp       = in_x - left_x_f;\n\
+    int    left_x_idx   = convert_int(left_x_f);\n\
+    int4   coord_in     = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);\n\
+    float4 top_l, top_r, top;\n\
+    uint4  dst;\n\
+\n\
+    top_l    = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;\n\
+    coord_in.x++;\n\
+    top_r    = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;\n\
+\n\
+    top_r    = top_r - top_l;\n\
+    top      = top_l + x_lerp * top_r;\n\
+    dst      = convert_uint4(top * out_scale + out_tail);\n\
+\n\
+    write_imageui(output, coord_out, dst);\n\
+}\n\
+"; /* end of resize_1d_bilinear_cl*/
+
+static const char resize_1d_nearest_cl[] = "\n\
+#define NEAREST_INDEX_PROCESS() \\\n\
+    int4   coord_out  = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \\\n\
+    float  in_x       = (convert_float(coord_out.x) + half_pixel_value) * scale_x + round_value; \\\n\
+    int    in_x_idx   = convert_int(in_x); \\\n\
+\n\
+__kernel void resize_1d_nearest_F32toF32(\n\
+    __read_only  image2d_array_t  input,\n\
+    __write_only image2d_array_t  output,\n\
+                           float  scale_x,\n\
+                           float  half_pixel_value,\n\
+                           float  round_value)\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+    int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);\n\
+    float4 dst;\n\
+    dst    = read_imagef(input, coord_in);\n\
+    write_imagef(output, coord_out, dst);\n\
+}\n\
+\n\
+\n\
+__kernel void resize_1d_nearest_U8toU8(\n\
+    __read_only  image2d_array_t  input,\n\
+    __write_only image2d_array_t  output,\n\
+                           float  scale_x,\n\
+                           float  half_pixel_value,\n\
+                           float  round_value,\n\
+                           float  output_scale,\n\
+                           float  output_tail)\n\
+{\n\
+    NEAREST_INDEX_PROCESS()\n\
+    int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);\n\
+    uint4 dst;\n\
+    dst    = convert_uint4(convert_float4(read_imageui(input, coord_in)) * output_scale + output_tail);\n\
+    write_imageui(output, coord_out, dst);\n\
+}\n\
+"; /* end of resize_1d_nearest_cl*/
+
 static const char resize_bilinear_cl[] = "__kernel void resize_bilinear_F32toF32(\n\
     __read_only  image2d_array_t  input,\n\
     __write_only image2d_array_t  output,\n\
@@ -46418,6 +48124,15 @@ static const source_map_t evis_resource[] =
     {"relational_ops_2d_vx", relational_ops_2d_vx},
     {"relational_ops_3d_vx", relational_ops_3d_vx},
     {"relu_keras_vx", relu_keras_vx},
+    {"resize_1d_bilinear_BF16_vx", resize_1d_bilinear_BF16_vx},
+    {"resize_1d_bilinear_DOWN_NX_vx", resize_1d_bilinear_DOWN_NX_vx},
+    {"resize_1d_bilinear_F16_vx", resize_1d_bilinear_F16_vx},
+    {"resize_1d_bilinear_I16_vx", resize_1d_bilinear_I16_vx},
+    {"resize_1d_bilinear_I8_vx", resize_1d_bilinear_I8_vx},
+    {"resize_1d_bilinear_U8_vx", resize_1d_bilinear_U8_vx},
+    {"resize_1d_bilinear_U8_opt_vx", resize_1d_bilinear_U8_opt_vx},
+    {"resize_1d_bilinear_UP_NX_vx", resize_1d_bilinear_UP_NX_vx},
+    {"resize_1d_nearest_vx", resize_1d_nearest_vx},
     {"resize_bilinear_BF16_vx", resize_bilinear_BF16_vx},
     {"resize_bilinear_F16_vx", resize_bilinear_F16_vx},
     {"resize_bilinear_I16_vx", resize_bilinear_I16_vx},
@@ -46552,6 +48267,8 @@ static const source_map_t cl_resource[] =
     {"reduceprod_internal_axis2_cl", reduceprod_internal_axis2_cl},
     {"relational_ops_cl", relational_ops_cl},
     {"relu_keras_cl", relu_keras_cl},
+    {"resize_1d_bilinear_cl", resize_1d_bilinear_cl},
+    {"resize_1d_nearest_cl", resize_1d_nearest_cl},
     {"resize_bilinear_cl", resize_bilinear_cl},
     {"resize_nearest_cl", resize_nearest_cl},
     {"scatter_nd_cl", scatter_nd_cl},
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_batch_norm.c b/src/tim/vx/internal/src/ops/vsi_nn_op_batch_norm.c
index c760898..64b94ee 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_batch_norm.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_batch_norm.c
@@ -176,8 +176,8 @@ static vsi_status op_optimize
         reshape 3d input (xcn) --> 4d input (whcn)
         reshape 3d output(xcn) --> 4d output(whcn)
     */
-    shape[0] = inputs[0]->attr.size[0];
-    shape[1] = 1;
+    shape[0] = 1;
+    shape[1] = inputs[0]->attr.size[0];
     shape[2] = inputs[0]->attr.size[1];
     shape[3] = inputs[0]->attr.size[2];
     dim = 4;
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_cast.c b/src/tim/vx/internal/src/ops/vsi_nn_op_cast.c
index 86e6ae2..25a8787 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_cast.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_cast.c
@@ -136,6 +136,7 @@ static vsi_bool op_check
         IO_TYPE(D_I32,  D_F32)
         IO_TYPE(D_I32,  D_I32)
         IO_TYPE(D_I32,  D_U32)
+        IO_TYPE(D_I32,  D_F16)
         IO_TYPE(D_I32,  D_BOOL8)
         IO_TYPE(D_U32,  D_F32)
         IO_TYPE(D_U32,  D_I32)
@@ -176,6 +177,7 @@ static vsi_bool op_check
         IO_TYPE(D_U8|Q_ASYM,  D_F32)
         IO_TYPE(D_U8|Q_ASYM,  D_I32)
         IO_TYPE(D_BF16,   D_BF16)
+        IO_TYPE(D_U8,   D_F16)
     END_IO_TYPE_DECL(CAST)
     if(!VALIDATE_OP_IO_TYPES(CAST, self, inputs, self->input.num, outputs, self->output.num)) {
         char* desc = generate_op_io_types_desc(inputs,
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_concat.c b/src/tim/vx/internal/src/ops/vsi_nn_op_concat.c
index 6ee4172..d36cf41 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_concat.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_concat.c
@@ -37,6 +37,29 @@
 #include "utils/vsi_nn_dtype_util.h"
 #include "utils/vsi_nn_constraint_check.h"
 
+static vsi_bool _enable_concat_optimize()
+{
+    char *envctrl;
+    static int32_t enableOptimize = -1;
+
+    if (enableOptimize == -1)
+    {
+        enableOptimize = 1;
+        envctrl = getenv("VSI_NN_ENABLE_CONCAT_OPTIMIZE");
+        if (envctrl)
+        {
+            enableOptimize = atoi(envctrl);
+        }
+    }
+
+    if (enableOptimize == 1)
+    {
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
 static int32_t _get_input_num
     (
     vsi_nn_node_t   * self,
@@ -243,7 +266,8 @@ static vsi_status op_compute
 
     status = VSI_SUCCESS;
     self->n = NULL;
-    if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs))
+    if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs)
+        && _enable_concat_optimize())
     {
         iter = self->nn_param.concat.lcl_data;
         while( NULL != iter )
@@ -397,7 +421,8 @@ static vsi_status op_optimize
     status = VSI_SUCCESS;
     /* we don't create tensor view if the axis is not the highest dimension */
     if (_is_highest_dimension(self, outputs) == FALSE ||
-        _is_same_quant(self, inputs, outputs) == FALSE)
+        _is_same_quant(self, inputs, outputs) == FALSE ||
+        _enable_concat_optimize() == FALSE)
     {
         return status;
     }
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_dataconvert.c b/src/tim/vx/internal/src/ops/vsi_nn_op_dataconvert.c
index 8d8de45..ca6b3db 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_dataconvert.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_dataconvert.c
@@ -194,6 +194,7 @@ static vsi_bool op_check
         IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
         IO_TYPE(D_I16|Q_DFP,  D_I8|Q_DFP)
         IO_TYPE(D_I16|Q_DFP,  D_U8|Q_ASYM)
+        IO_TYPE(D_I16|Q_DFP,  D_F16)
         IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
         IO_TYPE(D_I8|Q_DFP,   D_I8|Q_ASYM)
         IO_TYPE(D_I8|Q_ASYM,  D_I8|Q_ASYM)
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_depthwise_conv1d.c b/src/tim/vx/internal/src/ops/vsi_nn_op_depthwise_conv1d.c
index 5cbb9d6..4835ab7 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_depthwise_conv1d.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_depthwise_conv1d.c
@@ -31,6 +31,7 @@
 #include "vsi_nn_prv.h"
 #include "vsi_nn_ops.h"
 #include "vsi_nn_tensor.h"
+#include "vsi_nn_tensor_util.h"
 #include "utils/vsi_nn_util.h"
 #include "kernel/vsi_nn_kernel.h"
 /*
@@ -73,6 +74,7 @@ static vsi_status op_compute
     {
         status = VSI_SUCCESS;
     }
+
     vsi_nn_kernel_param_release( &param );
     return status;
 } /* op_compute() */
@@ -119,7 +121,7 @@ static vsi_bool op_setup
             VSI_NN_ROUND_FLOOR
             );
 
-        outputs[0]->attr.size[1] = inputs[1]->attr.size[2];
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1] * p->multiplier;
         outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
         outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
     }
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_eltwise_unary.c b/src/tim/vx/internal/src/ops/vsi_nn_op_eltwise_unary.c
index c4192d3..c74da7a 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_eltwise_unary.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_eltwise_unary.c
@@ -45,22 +45,30 @@ static vsi_status _eltwise_unary_op_compute
     )
 {
     vsi_status status = VSI_FAILURE;
+    float alpha = 0;
+    vsi_nn_kernel_param_t * param = NULL;
 
     if( NULL == self )
     {
         return status;
     }
+    param = vsi_nn_kernel_param_create();
+
+    alpha = self->nn_param.elu.alpha;
+    vsi_nn_kernel_param_add_float32( param, "alpha", alpha );
 
     // TODO: This optimzie is a hack for gpu path,
     // it should be moved to gpu kernel setup.
     self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
-        kernel_name, inputs, 1, outputs, 1, NULL );
+        kernel_name, inputs, 1, outputs, 1, param );
 
     if( self->n )
     {
         status = VSI_SUCCESS;
     }
 
+    vsi_nn_kernel_param_release( &param );
+
     return status;
 } /* _eltwise_op_compute() */
 
@@ -152,6 +160,19 @@ static vsi_bool op_check
     return TRUE;
 } /* op_check() */
 
+static vsi_status op_init
+    (
+    vsi_nn_node_t * self
+    )
+{
+    if (vsi_nn_compareVersion(self->graph, 1, 1, 29) == -1)
+    {
+        self->nn_param.elu.alpha = 1;
+    }
+
+    return VSI_SUCCESS;
+} /* op_init() */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -166,7 +187,7 @@ extern "C" {
     { \
         return _eltwise_unary_op_compute( ""#kernel_name, self, inputs, outputs ); \
     } \
-DEF_OP_REG(name, NULL, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1)
+DEF_OP_REG(name, op_init, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1)
 
 DEF_ELEMENT_WISE_UNARY_OP( SIN, sin );
 DEF_ELEMENT_WISE_UNARY_OP( EXP, exp );
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_hashtable_lookup.c b/src/tim/vx/internal/src/ops/vsi_nn_op_hashtable_lookup.c
index a0d0395..dbec838 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_hashtable_lookup.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_hashtable_lookup.c
@@ -73,6 +73,7 @@ static vsi_bool op_check
         IO_TYPE(D_I32, D_I32,  D_I32, D_F16, D_I32)
         IO_TYPE(D_I32, D_I32,  D_F32, D_F16, D_F32)
         IO_TYPE(D_I32, D_I32,  D_U8|Q_ASYM, D_F16, D_U8|Q_ASYM)
+        IO_TYPE(D_I32, D_I32,  D_F32, D_F32, D_U8|Q_ASYM)
     END_IO_TYPE_DECL(HASHTABLE_LOOKUP)
     if (!VALIDATE_OP_IO_TYPES(HASHTABLE_LOOKUP, self, inputs, self->input.num, outputs, self->output.num))
     {
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_interp.c b/src/tim/vx/internal/src/ops/vsi_nn_op_interp.c
new file mode 100644
index 0000000..cc38677
--- /dev/null
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_interp.c
@@ -0,0 +1,298 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (2)
+#define _OUTPUT_NUM         (1)
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_internal_compute_node( self );
+
+    return status;
+} /* op_compute() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_nn_interp_param *p = NULL;
+
+    p = &self->nn_param.interp;
+
+    if ((p->pad_beg > 0) || (p->pad_end > 0))
+    {
+        VSILOGE("Only supports non-pos padding (cropping) for now ");
+        return FALSE;
+    }
+
+    return TRUE;
+} /* op_check() */
+
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_nn_interp_param *p = NULL;
+    int32_t height_in_eff_, width_in_eff_;
+    int32_t height_out, width_out;
+    vsi_nn_internal_node_t* curr = NULL;
+    vsi_nn_internal_tensor_t *crop_tensor = NULL;
+    vsi_nn_tensor_t *crop_in_tensor = NULL;
+    float factor = 1.0f;
+    int32_t pad_beg = 0;
+    int32_t pad_end = 0;
+
+    if ( NULL == self )
+    {
+        return FALSE;
+    }
+
+    p = &self->nn_param.interp;
+    pad_beg = -p->pad_beg;
+    pad_end = -p->pad_end;
+    width_in_eff_  = inputs[0]->attr.size[0] + p->pad_beg + p->pad_end;
+    height_in_eff_ = inputs[0]->attr.size[1] + p->pad_beg + p->pad_end;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        memcpy( outputs[0]->attr.size, inputs[0]->attr.size,
+            VSI_NN_MAX_DIM_NUM * sizeof( uint32_t ) );
+        if ((p->shrink_factor > 0) && (p->zoom_factor <= 0))
+        {
+            width_out  = (width_in_eff_ - 1) / p->shrink_factor + 1;
+            height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
+        }
+        else if ((p->zoom_factor > 0) && (p->shrink_factor <= 0))
+        {
+            width_out  = (width_in_eff_ - 1) * (p->zoom_factor - 1) + width_in_eff_;
+            height_out = (height_in_eff_ - 1) * (p->zoom_factor - 1) + height_in_eff_;
+        }
+        else if ((p->height > 0) && (p->width > 0))
+        {
+            width_out  = p->width;
+            height_out = p->height;
+        }
+        else if ((p->zoom_factor > 0) && (p->shrink_factor > 0))
+        {
+            width_out  = (width_in_eff_ - 1) / p->shrink_factor + 1;
+            height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
+            width_out  = (width_out - 1) * (p->zoom_factor - 1) + width_out;
+            height_out = (height_out - 1) * (p->zoom_factor - 1) + height_out;
+        }
+        else if (NULL != inputs[1])
+        {
+            width_out  = inputs[1]->attr.size[0];
+            height_out = inputs[1]->attr.size[1];
+        }
+        else
+        {
+            VSILOGE("Not support params ");
+            return FALSE;
+        }
+
+        if ((width_out < 0) || (height_out < 0) || (width_in_eff_ < 0) || (height_in_eff_ < 0))
+        {
+            VSILOGE("value shoud be positive: width_out %d height_out %d width_in_eff_ %d height_in_eff_ %d ",
+                    width_out, height_out, width_in_eff_, height_in_eff_);
+            return FALSE;
+        }
+
+        outputs[0]->attr.size[0] = width_out;
+        outputs[0]->attr.size[1] = height_out;
+    }
+
+    factor = (float)(outputs[0]->attr.size[0]) / (float)(width_in_eff_);
+
+    if ((pad_beg > 0) || (pad_end > 0))
+    {
+        vsi_nn_tensor_attr_t attr;
+        int32_t use_virtual_tensor = 1;
+        int32_t *begin_dims;
+        int32_t *end_dims;
+        int32_t *stride_dims;
+        uint32_t i;
+        memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
+        vsi_nn_internal_init_tensor_attr(&attr, &inputs[0]->attr.dtype, use_virtual_tensor);
+        crop_tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
+        crop_in_tensor = crop_tensor->t;
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_STRIDED_SLICE, 1, 1 );
+        curr->node->nn_param.strided_slice.begin_dims_num = inputs[0]->attr.dim_num;
+        curr->node->nn_param.strided_slice.end_dims_num = inputs[0]->attr.dim_num;
+        curr->node->nn_param.strided_slice.stride_dims_num = inputs[0]->attr.dim_num;
+        curr->node->nn_param.strided_slice.begin_mask = 0;
+        curr->node->nn_param.strided_slice.end_mask = 0;
+        curr->node->nn_param.strided_slice.shrink_axis_mask = 0;
+        begin_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
+            VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
+        end_dims   = (int32_t *)vsi_nn_internal_new_node_param(curr,
+            VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
+        stride_dims  = (int32_t *)vsi_nn_internal_new_node_param(curr,
+            VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
+        for (i = 0; i < inputs[0]->attr.dim_num; i++)
+        {
+            stride_dims[i] = 1;
+        }
+
+        begin_dims[0] = pad_beg;
+        begin_dims[1] = pad_beg;
+        end_dims[0]   = inputs[0]->attr.size[0] - pad_end;
+        end_dims[1]   = inputs[0]->attr.size[1] - pad_end;
+
+        if (inputs[0]->attr.dim_num > 2)
+        {
+            for (i = 2 ; i < inputs[0]->attr.dim_num; i++)
+            {
+                begin_dims[i] = 0;
+                end_dims[i]   = inputs[0]->attr.size[i];
+            }
+        }
+        curr->node->nn_param.strided_slice.begin_dims = begin_dims;
+        curr->node->nn_param.strided_slice.end_dims = end_dims;
+        curr->node->nn_param.strided_slice.stride_dims = stride_dims;
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = crop_in_tensor;
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else
+    {
+        crop_in_tensor = inputs[0];
+    }
+
+    if ((width_in_eff_ == (int32_t)outputs[0]->attr.size[0]) && (height_in_eff_ == (int32_t)outputs[0]->attr.size[1]))
+    {
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 1, 1 );
+        curr->inputs[0]  = crop_in_tensor;
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else
+    {
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_INTERNAL, 1, 1 );
+        curr->node->nn_param.resize_internal.align_corners = vx_true_e;
+        curr->node->nn_param.resize_internal.factor = factor;
+        curr->node->nn_param.resize_internal.half_pixel_centers = vx_false_e;
+        curr->inputs[0]  = crop_in_tensor;
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_optimize
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs,
+    vsi_nn_opt_direction_e direction
+    )
+{
+    vsi_status     status;
+
+    status = VSI_SUCCESS;
+    vsi_nn_internal_optimize_node( self, direction );
+
+    return status;
+} /* op_optimize() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_internal_init_node_wksp(self);
+    self->nn_param.interp.height         = 0;
+    self->nn_param.interp.width          = 0;
+    self->nn_param.interp.pad_beg        = 0;
+    self->nn_param.interp.pad_end        = 0;
+    self->nn_param.interp.shrink_factor  = 0;
+    self->nn_param.interp.zoom_factor    = 0;
+
+    return status;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    vsi_nn_internal_deinit_node_wksp(self);
+    status = vsi_nn_op_common_deinit(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ INTERP,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ op_optimize,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_permute.c b/src/tim/vx/internal/src/ops/vsi_nn_op_permute.c
index cd0a9db..a1a825a 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_permute.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_permute.c
@@ -155,11 +155,13 @@ static vsi_bool op_check
     BEGIN_IO_TYPE_DECL(PERMUTE, 1, 1)
         IO_TYPE(D_F16,  D_F16)
         IO_TYPE(D_F16,  D_F32)
+        IO_TYPE(D_I16,  D_I16)
         IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
         IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
         IO_TYPE(D_U8|Q_ASYM,  D_U8|Q_ASYM)
         IO_TYPE(D_U8|Q_ASYM,  D_F16)
         IO_TYPE(D_U8|Q_ASYM,  D_F32)
+        IO_TYPE(D_I8|Q_SYM_PC,   D_I8|Q_SYM_PC)
         IO_TYPE(D_BOOL8,  D_BOOL8)
         IO_TYPE(D_BOOL8,  D_I8|Q_DFP)
         IO_TYPE(D_F32,  D_F32)
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d.c b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d.c
new file mode 100644
index 0000000..235d0c3
--- /dev/null
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d.c
@@ -0,0 +1,207 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+
+static vsi_bool _is_same_shape
+    (
+    vsi_nn_tensor_t * inputs,
+    uint32_t *sizes,
+    uint32_t dims
+    )
+{
+    uint32_t i = 0;
+
+    if (inputs->attr.dim_num != dims)
+        return FALSE;
+
+    for (i = 0; i < dims; i++)
+    {
+        if (sizes[i] != inputs->attr.size[i])
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_FAILURE;
+
+    status = vsi_nn_internal_compute_node( self );
+
+    return status;
+} /* op_compute() */
+
+
+static vsi_status op_optimize
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs,
+    vsi_nn_opt_direction_e direction
+    )
+{
+    if ( _is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num) )
+    {
+        return vsi_nn_internal_optimize_node(self, direction );
+    }
+    else
+    {
+        return VSI_SUCCESS;
+    }
+} /* op_optimize() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    return TRUE;
+} /* op_check() */
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    float factor = self->nn_param.resize_1d.factor;
+    vsi_nn_internal_node_t* curr = NULL;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        if (factor != 0)
+        {
+            outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
+        }
+        else
+        {
+            outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
+        }
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
+        outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
+        outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
+    }
+
+    if (_is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num))
+    {
+        vsi_nn_internal_init_node_wksp( self );
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 0, 0 );
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else if (VSI_NN_INTERPOLATION_BILINEAR == self->nn_param.resize_1d.type)
+    {
+        vsi_nn_internal_init_node_wksp( self );
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL, 0, 0 );
+        curr->node->nn_param.resize_1d_bilinear_internal.align_corners = self->nn_param.resize_1d.align_corners;
+        curr->node->nn_param.resize_1d_bilinear_internal.factor = self->nn_param.resize_1d.factor;
+        curr->node->nn_param.resize_1d_bilinear_internal.half_pixel_centers = \
+                                              self->nn_param.resize_1d.half_pixel_centers;
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else if (VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR == self->nn_param.resize_1d.type)
+    {
+        vsi_nn_internal_init_node_wksp( self );
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL, 0, 0 );
+        curr->node->nn_param.resize_1d_nearest_internal.align_corners = self->nn_param.resize_1d.align_corners;
+        curr->node->nn_param.resize_1d_nearest_internal.factor = self->nn_param.resize_1d.factor;
+        curr->node->nn_param.resize_1d_nearest_internal.half_pixel_centers = \
+                                              self->nn_param.resize_1d.half_pixel_centers;
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+    return VSI_SUCCESS;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_internal_deinit_node_wksp(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ RESIZE_1D,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ op_optimize,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_bilinear_internal.c b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_bilinear_internal.c
new file mode 100644
index 0000000..66ea066
--- /dev/null
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_bilinear_internal.c
@@ -0,0 +1,171 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "utils/vsi_nn_constraint_check.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    int32_t  align_corners       = self->nn_param.resize_1d_bilinear_internal.align_corners;
+    int32_t  half_pixel_centers  = self->nn_param.resize_1d_bilinear_internal.half_pixel_centers;
+    vsi_nn_kernel_param_t * param = NULL;
+
+    param = vsi_nn_kernel_param_create();
+
+    vsi_nn_kernel_param_add_int32( param, "align_corners",  align_corners );
+    vsi_nn_kernel_param_add_int32( param, "half_pixel_centers",  half_pixel_centers );
+
+    self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
+            "resize_1d_bilinear",
+            &inputs[0], 1,
+            &outputs[0], 1, param );
+
+    if ( self->n )
+    {
+        status = VSI_SUCCESS;
+    }
+
+    vsi_nn_kernel_param_release( &param );
+
+    return status;
+} /* op_compute() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    BEGIN_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL, 1, 1)
+        IO_TYPE(D_F16,  D_U8|Q_ASYM)
+        IO_TYPE(D_F16,  D_F16)
+        IO_TYPE(D_BF16, D_BF16)
+        IO_TYPE(D_F32,  D_F32)
+        IO_TYPE(D_U8|Q_ASYM,  D_U8|Q_ASYM)
+        IO_TYPE(D_U8|Q_ASYM,  D_F16)
+        IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
+        IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
+    END_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL)
+    if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_BILINEAR_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
+        char* desc = generate_op_io_types_desc(inputs,
+                self->input.num, outputs, self->output.num);
+        VSILOGE("Inputs/Outputs data type not support: %s", desc);
+        destroy_op_io_types_desc(desc);
+        return FALSE;
+    }
+
+    return TRUE;
+} /* op_check() */
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    float factor = self->nn_param.resize_1d_bilinear_internal.factor;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        if (factor != 0)
+        {
+            outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
+        }
+        else
+        {
+            outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
+        }
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
+        outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
+        outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+
+    return VSI_SUCCESS;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_op_common_deinit(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ RESIZE_1D_BILINEAR_INTERNAL,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ NULL,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_nearest_internal.c b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_nearest_internal.c
new file mode 100644
index 0000000..edddc1a
--- /dev/null
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_nearest_internal.c
@@ -0,0 +1,170 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "utils/vsi_nn_constraint_check.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    int32_t  align_corners       = self->nn_param.resize_1d_nearest_internal.align_corners;
+    int32_t  half_pixel_centers  = self->nn_param.resize_1d_nearest_internal.half_pixel_centers;
+    vsi_nn_kernel_param_t * param = NULL;
+
+    param = vsi_nn_kernel_param_create();
+
+    vsi_nn_kernel_param_add_int32( param, "align_corners",  align_corners );
+    vsi_nn_kernel_param_add_int32( param, "half_pixel_centers",  half_pixel_centers );
+
+    self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
+            "resize_1d_nearest",
+            &inputs[0], 1,
+            &outputs[0], 1, param );
+
+    if ( self->n )
+    {
+        status = VSI_SUCCESS;
+    }
+
+    vsi_nn_kernel_param_release( &param );
+
+    return status;
+} /* op_compute() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    BEGIN_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL, 1, 1)
+        IO_TYPE(D_F16,  D_F16)
+        IO_TYPE(D_BF16, D_BF16)
+        IO_TYPE(D_F32,  D_F32)
+        IO_TYPE(D_U8|Q_ASYM,  D_U8|Q_ASYM)
+        IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
+        IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
+    END_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL)
+    if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_NEAREST_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
+        char* desc = generate_op_io_types_desc(inputs,
+                self->input.num, outputs, self->output.num);
+        VSILOGE("Inputs/Outputs data type not support: %s", desc);
+        destroy_op_io_types_desc(desc);
+
+        return FALSE;
+    }
+
+    return TRUE;
+} /* op_check() */
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    float factor = self->nn_param.resize_1d_nearest_internal.factor;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        if (factor != 0)
+        {
+            outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
+        }
+        else
+        {
+            outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
+        }
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
+        outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
+        outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+
+    return VSI_SUCCESS;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_op_common_deinit(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ RESIZE_1D_NEAREST_INTERNAL,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ NULL,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
diff --git a/src/tim/vx/internal/src/ops/vsi_nn_op_svdf.c b/src/tim/vx/internal/src/ops/vsi_nn_op_svdf.c
index e55d1f8..94fa617 100644
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_svdf.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_svdf.c
@@ -163,12 +163,17 @@ static vsi_bool op_check
         ret = FALSE;
     }
 
+    if(ret)
     {
         BEGIN_IO_TYPE_DECL(SVDF, 5, 2)
             IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_F16)
             IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_F16)
             IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_F16)
             IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_F32)
+            IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_NONE)
+            IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_NONE)
+            IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_NONE)
+            IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_NONE)
         END_IO_TYPE_DECL(SVDF)
         if(!VALIDATE_OP_IO_TYPES(SVDF, self, inputs, self->input.num, outputs, self->output.num)) {
             char* desc = generate_op_io_types_desc(inputs,
diff --git a/src/tim/vx/internal/src/utils/vsi_nn_code_generator.c b/src/tim/vx/internal/src/utils/vsi_nn_code_generator.c
index 4a898bc..518b099 100644
--- a/src/tim/vx/internal/src/utils/vsi_nn_code_generator.c
+++ b/src/tim/vx/internal/src/utils/vsi_nn_code_generator.c
@@ -117,8 +117,7 @@ static void _try_pack_tensor_data
             {
                 *p_sz = (uint64_t)bytes;
             }
-            free( data );
-            data = NULL;
+            vsi_nn_safe_free( data );
         }
     }
 } /* _pack_tensor_data() */
@@ -417,6 +416,8 @@ static _op_param_gen_t s_op_gen[] =
     /* PRE_PROCESS_NV12 */      NULL,
     /* SCATTER_ND */            NULL,
     /* DECONVOLUTION1D */       NULL,
+    /* INTERP */                NULL,
+    /* RESIZE_1D */             NULL,
 };
 _compiler_assert( _cnt_of_array(s_op_gen) == VSI_NN_OP_NUM, vsi_nn_code_generator_c );
 
diff --git a/src/tim/vx/internal/src/utils/vsi_nn_tensor_op.c b/src/tim/vx/internal/src/utils/vsi_nn_tensor_op.c
index b40e755..febd192 100644
--- a/src/tim/vx/internal/src/utils/vsi_nn_tensor_op.c
+++ b/src/tim/vx/internal/src/utils/vsi_nn_tensor_op.c
@@ -149,7 +149,7 @@ vsi_nn_tensor_t* vsi_nn_Concat
             src = j;
             memcpy( &buffer[dst * type_bytes], &tmp[src * type_bytes], type_bytes );
         }
-        free(tmp);
+        vsi_nn_safe_free( tmp );
         offset += strides[axis] * tensors[i]->attr.size[axis];
     }
     tensor_out = vsi_nn_CreateTensorFromData( graph, buffer, &output_attr );
@@ -221,11 +221,7 @@ vsi_nn_tensor_t* vsi_nn_ConvertTensorDtype
         }
     }
 
-    if( src_buf )
-    {
-        free( src_buf );
-        src_buf = NULL;
-    }
+    vsi_nn_safe_free( src_buf );
     if( dst_buf )
     {
         free( dst_buf );
@@ -333,10 +329,7 @@ vsi_nn_tensor_t* vsi_nn_TensorAdd
 error:
     for ( i = 0; i < tensor_num; i++ )
     {
-        if ( buffer[i] )
-        {
-            free(buffer[i]);
-        }
+        vsi_nn_safe_free( buffer[i] );
     }
     if( tmp )
     {
diff --git a/src/tim/vx/internal/src/utils/vsi_nn_util.c b/src/tim/vx/internal/src/utils/vsi_nn_util.c
index 1482fc1..9ac442d 100644
--- a/src/tim/vx/internal/src/utils/vsi_nn_util.c
+++ b/src/tim/vx/internal/src/utils/vsi_nn_util.c
@@ -710,33 +710,6 @@ vsi_bool vsi_nn_CheckFilePath
     return FALSE;
 } /* vsi_nn_CheckFilePath() */
 
-void vsi_nn_GetFP32MultiAndPostShift
-    (
-    vx_float32 mult,
-    vx_uint16 *M0,
-    vx_int8 *N
-    )
-{
-    vx_uint32 uintMult          = *((vx_uint32*)(&mult));
-    vx_uint32 tmpMultiply       = 0;
-    vx_int32  exp               = 0;
-    vx_uint32 postShiftBit6to5  = 0;
-    vx_uint32 postShift         = 0;
-    vx_int8   tmpPostShift      = 0;
-
-    tmpMultiply         = (uintMult & 0x7FFFFF) >> 8;
-    *M0                 = (vx_uint16)((1U << 15) + tmpMultiply);
-
-    exp                 = (uintMult & 0x7F800000) >> 23; /* postShift is Scale's exp*/
-    tmpPostShift        = 15 - ((vx_int8)exp - 127);
-    postShift           = tmpPostShift & 0x1F;
-    tmpPostShift        = tmpPostShift >> 5;
-    postShiftBit6to5    = tmpPostShift & 3;
-
-    *N = (vx_int8)(((postShiftBit6to5 << 5) | (postShift & 0x1F)));
-    *N = (((vx_int32)*N << 25) >> 25);
-}/* vsi_nn_GetFP32MultiAndPostShift() */
-
 typedef struct
 {
     uint8_t* raw_addr;
diff --git a/src/tim/vx/internal/src/vsi_nn_graph_optimization.c b/src/tim/vx/internal/src/vsi_nn_graph_optimization.c
index 9732a50..1cde801 100644
--- a/src/tim/vx/internal/src/vsi_nn_graph_optimization.c
+++ b/src/tim/vx/internal/src/vsi_nn_graph_optimization.c
@@ -520,6 +520,7 @@ static vx_tensor _create_const_raw_tensor
     vx_tensor tensor = NULL;
     vx_tensor_create_params_t params;
     float * scales = NULL;
+    int32_t * zeroPoints = NULL;
 
     memset( &params, 0, sizeof( vx_tensor_create_params_t ) );
     params.num_of_dims = attr.dim_num;
@@ -539,12 +540,14 @@ static vx_tensor _create_const_raw_tensor
 #ifdef VSI_PERCHANNEL_QUANTIZATION_SUPPORT
         // This is a hack that driver doesn't support const scale
         scales = (float *)malloc(sizeof(float) * attr.dtype.scale_dim);
+        zeroPoints = (int32_t *)malloc(sizeof(int32_t) * attr.dtype.zero_points_dim);
         memcpy(scales, attr.dtype.scales, attr.dtype.scale_dim * sizeof(float));
+        memcpy(zeroPoints, attr.dtype.zero_points, attr.dtype.zero_points_dim * sizeof(float));
         params.quant_data.affinePerChannel.channelDim = attr.dtype.channel_dim;
         params.quant_data.affinePerChannel.scaleCount = attr.dtype.scale_dim;
         params.quant_data.affinePerChannel.scales = scales;
-        params.quant_data.affinePerChannel.zeroPoint = NULL;
-        params.quant_data.affinePerChannel.zeroPointCount = 0;
+        params.quant_data.affinePerChannel.zeroPoint = zeroPoints;
+        params.quant_data.affinePerChannel.zeroPointCount = attr.dtype.zero_points_dim;
         break;
 #else
     VSILOGE( "can't support qnt_type VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC." );
@@ -580,6 +583,10 @@ static vx_tensor _create_const_raw_tensor
                     {
                         free( scales );
                     }
+                    if (zeroPoints)
+                    {
+                        free( zeroPoints );
+                    }
                     return NULL;
                 }
             }
@@ -620,6 +627,10 @@ static vx_tensor _create_const_raw_tensor
     {
         free( scales );
     }
+    if (zeroPoints)
+    {
+        free( zeroPoints );
+    }
 
     return tensor;
 } /* _create_const_raw_tensor() */
@@ -689,6 +700,8 @@ static void _convert_const_I8toU8
 
     if ( tensor->t ) vxReleaseTensor(&tensor->t);
     tensor->t = vsi_nn_CreateRawTensorFromData(graph, data, attr);
+
+    vsi_nn_safe_free( data );
 }/* _convert_const_I8toU8() */
 
 static vsi_status _convert_graph_const_tensor
diff --git a/src/tim/vx/internal/src/vsi_nn_log.c b/src/tim/vx/internal/src/vsi_nn_log.c
index c818463..9ee1114 100644
--- a/src/tim/vx/internal/src/vsi_nn_log.c
+++ b/src/tim/vx/internal/src/vsi_nn_log.c
@@ -29,25 +29,49 @@
 #include "vsi_nn_log.h"
 #include "vsi_nn_types.h"
 
+#ifdef __ANDROID__
+#if ANDROID_SDK_VERSION >= 30
+static const char* ENV_LOG_LEVEL = "vendor.VSI_NN_LOG_LEVEL";
+#else
+static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
+#endif
+#else
+static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
+#endif
+
+int get_env_as_int(const char* env, int default_value) {
+
+    int value = default_value;
+    #ifdef __ANDROID__
+    {
+        char value_str[100];
+        int status = __system_property_get(env, value_str);
+        if (status) {
+            value = atoi(value_str);
+        }
+    }
+    #else
+    {
+        char* env_s = getenv(env);
+        if (env_s) {
+            value = atoi(env_s);
+        }
+    }
+    #endif
+
+    return value;
+}
+
 static vsi_bool _check_log_level
     (
     vsi_nn_log_level_e level
     )
 {
-    char *env_level_s;
     static vsi_nn_log_level_e env_level = VSI_NN_LOG_UNINIT;
 
     if(env_level == VSI_NN_LOG_UNINIT)
     {
-        env_level_s = getenv("VSI_NN_LOG_LEVEL");
-        if(env_level_s)
-        {
-            env_level = (vsi_nn_log_level_e)atoi(env_level_s);
-        }
-        else
-        {
-            env_level = VSI_NN_LOG_WARN;
-        }
+        env_level = (vsi_nn_log_level_e)get_env_as_int(ENV_LOG_LEVEL, VSI_NN_LOG_WARN);
     }
 
     if(env_level >= level)
diff --git a/src/tim/vx/internal/src/vsi_nn_tensor.c b/src/tim/vx/internal/src/vsi_nn_tensor.c
index d9d0158..0af8be5 100644
--- a/src/tim/vx/internal/src/vsi_nn_tensor.c
+++ b/src/tim/vx/internal/src/vsi_nn_tensor.c
@@ -844,7 +844,7 @@ float * vsi_nn_ConvertTensorToFloat32Data
 
     if( !tensor->attr.is_created_from_handle )
     {
-        if(tensor_data)free(tensor_data);
+        vsi_nn_safe_free( tensor_data );
     }
     return data;
 } /* vsi_nn_ConvertTensorToFloat32Data() */
@@ -1095,7 +1095,7 @@ void vsi_nn_SaveTensorToTextByFp32
     }
     fwrite( buf, count, 1, fp );
     fclose( fp );
-    free( data );
+    vsi_nn_safe_free( data );
 } /* vsi_nn_SaveTensorToTextByFp32() */
 
 void vsi_nn_SaveTensorToText
@@ -1124,7 +1124,7 @@ void vsi_nn_SaveTensorToText
     sz = vsi_nn_GetElementNum( tensor );
     vsi_nn_SaveDataToText( filename, data, sz,
         tensor->attr.dtype.vx_type, seperator );
-    free( data );
+    vsi_nn_safe_free( data );
 } /* vsi_nn_SaveTensorToText() */
 
 void vsi_nn_SaveDataToText
@@ -1219,7 +1219,7 @@ void vsi_nn_SaveTensorToBinary
     }
     fwrite( data, sz, 1, fp );
     fclose( fp );
-    free( data );
+    vsi_nn_safe_free( data );
 } /* vsi_nn_SaveTensorToBinary() */
 
 vsi_nn_tensor_t * vsi_nn_CreateTensorFromData
@@ -1539,7 +1539,7 @@ void vsi_nn_TransposeTensor
         VSILOGE( "Copy transpose data fail with code %#x.", status );
     }
 
-    free( buf );
+    vsi_nn_safe_free( buf );
     free( dst );
 } /* vsi_nn_TransposeTensor() */
 
@@ -1588,7 +1588,7 @@ void vsi_nn_PermuteTensor
         if( perm[i] >= dim_num )
         {
             VSILOGW( "Incorrect perm %d", perm[i] );
-            if( buf ) { free(buf); buf = NULL; }
+            vsi_nn_safe_free( buf );
             if( dst ) { free(dst); dst = NULL; }
             return;
         }
@@ -1603,7 +1603,7 @@ void vsi_nn_PermuteTensor
         VSILOGE( "Copy permute data fail with code %#x.", status );
     }
 
-    if( buf ) { free(buf); buf = NULL; }
+    vsi_nn_safe_free( buf );
     if( dst ) { free(dst); dst = NULL; }
 } /* vsi_nn_PermuteTensor() */
 
@@ -2241,7 +2241,7 @@ void vsi_nn_reshuffle_weight_data
     }
     vsi_nn_CopyDataToTensor( graph, weights, weight_data );
     vsi_nn_Free( buffer );
-    vsi_nn_Free( weight_data );
+    vsi_nn_safe_free( weight_data );
 }
 
 vsi_nn_tensor_t* vsi_nn_ConcatTensor_impl