Update internal to REL/v1.1.30

Commit: 6ccb425e Signed-off-by: Jiang Bo <bo.jiang@verisilicon.com>
2021-02-26 14:01:35 +08:00 · 2021-02-26 14:01:35 +08:00 · def53f4b5c
parent 62898a4419
commit def53f4b5c
77 changed files with 8294 additions and 225 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,335 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
+
+# User-specific files
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+*-[Dd]ebug/
+*-[Dd]ebugPublic/
+*-[Rr]elease/
+*-[Rr]eleases/
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Build results
+*.o
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+NNApi0.3/
+NNApi0.4/
+OpenVX1.2/
+bazel-bin
+bazel-genfiles
+bazel-out
+bazel-ovxlib
+bazel-testlogs
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# VS code
+.vscode
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUNIT
+*.VisualState.xml
+TestResult.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+**/Properties/launchSettings.json
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_i.h
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# JustCode is a .NET coding add-in
+.JustCode
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# TypeScript v1 declaration files
+typings/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# JetBrains Rider
+.idea/
+*.sln.iml
+
+# CodeRush
+.cr/
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output 
+ASALocalRun/
+
+# IDE
+.settings/
--- a/src/tim/vx/internal/include/interface/ops.def
+++ b/src/tim/vx/internal/include/interface/ops.def
@ -144,3 +144,5 @@ DEF_OP(PRE_PROCESS_YUV444)
 DEF_OP(PRE_PROCESS_NV12)
 DEF_OP(SCATTER_ND)
 DEF_OP(DECONVOLUTION1D)
+DEF_OP(INTERP)
+DEF_OP(RESIZE_1D)
--- a/src/tim/vx/internal/include/internal/internal_ops.def
+++ b/src/tim/vx/internal/include/internal/internal_ops.def
@ -14,3 +14,5 @@ DEF_OP(RESIZE_NEAREST_INTERNAL)
 DEF_OP(DEPTH2SPACE_INTERNAL)
 DEF_OP(GRUCELL_ACTIVATION_INTERNAL)
 DEF_OP(GRUCELL_ACTIVATION_INTERNAL_SMA)
+DEF_OP(RESIZE_1D_BILINEAR_INTERNAL)
+DEF_OP(RESIZE_1D_NEAREST_INTERNAL)
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_elu.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_elu.h
@ -44,6 +44,7 @@ typedef struct _vsi_nn_elu_param
 {
    /* elu layer local data structure */
    vsi_nn_elu_lcl_data local;
+    float  alpha;
 } vsi_nn_elu_param;

 #ifdef __cplusplus
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_interp.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_interp.h
@ -0,0 +1,44 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_INTERP_H
+#define _VSI_NN_OP_INTERP_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_interp_param
+{
+    struct _interp_local_data_t* local;
+    int32_t   height;  //height of output
+    int32_t   width;   //width of output
+    int32_t   zoom_factor; // zoom factor
+    int32_t   shrink_factor; // shrink factor
+    int32_t   pad_beg; //padding at begin of input
+    int32_t   pad_end; //padding at end of intput
+} vsi_nn_interp_param;
+
+
+
+#endif
+
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d.h
@ -0,0 +1,44 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_RESIZE_1D_H
+#define _VSI_NN_OP_RESIZE_1D_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_resize_1d_param
+{
+    struct _resize_1d_local_data_t* local;
+    vsi_enum     type;
+    float        factor;
+    int32_t      size[2];
+    vsi_bool    align_corners;
+    vsi_bool    half_pixel_centers;
+} vsi_nn_resize_1d_param;
+
+_compiler_assert(offsetof(vsi_nn_resize_1d_param, local) == 0, \
+    vsi_nn_resize_1d_h );
+
+#endif
+
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_bilinear_internal.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_bilinear_internal.h
@ -0,0 +1,42 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
+#define _VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_resize_1d_bilinear_internal_param
+{
+    struct _resize_1d_bilinear_internal_local_data_t* local;
+    vsi_bool    align_corners;
+    vsi_bool    half_pixel_centers;
+    float        factor;
+} vsi_nn_resize_1d_bilinear_internal_param;
+
+_compiler_assert(offsetof(vsi_nn_resize_1d_bilinear_internal_param, local) == 0, \
+    vsi_nn_resize_1d_bilinear_internal_h );
+
+#endif
+
--- a/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_nearest_internal.h
+++ b/src/tim/vx/internal/include/ops/vsi_nn_op_resize_1d_nearest_internal.h
@ -0,0 +1,42 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+#ifndef _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
+#define _VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL_H
+
+#include "vsi_nn_types.h"
+
+typedef struct _vsi_nn_resize_1d_nearest_internal_param
+{
+    struct _resize_1d_nearest_internal_local_data_t* local;
+    vsi_bool    align_corners;
+    vsi_bool    half_pixel_centers;
+    float        factor;
+} vsi_nn_resize_1d_nearest_internal_param;
+
+_compiler_assert(offsetof(vsi_nn_resize_1d_nearest_internal_param, local) == 0, \
+    vsi_nn_resize_1d_nearest_internal_h );
+
+#endif
+
--- a/src/tim/vx/internal/include/utils/vsi_nn_util.h
+++ b/src/tim/vx/internal/include/utils/vsi_nn_util.h
@ -195,13 +195,6 @@ OVXLIB_API vsi_bool vsi_nn_CheckFilePath
    const char *path
    );

-OVXLIB_API void vsi_nn_GetFP32MultiAndPostShift
-    (
-    vx_float32 mult,
-    vx_uint16 *M0,
-    vx_int8 *N
-    );
-
 /**
 * Malloc aligned buffer
 * Malloc address and size aligned buffer.
--- a/src/tim/vx/internal/include/vsi_nn_graph_optimization.h
+++ b/src/tim/vx/internal/include/vsi_nn_graph_optimization.h
@ -32,6 +32,13 @@
 extern "C" {
 #endif

+vx_tensor vsi_nn_CreateRawTensorFromData
+    (
+    vsi_nn_graph_t       * graph,
+    uint8_t             * data,
+    vsi_nn_tensor_attr_t * attr
+    );
+
 vsi_status vsi_nn_OptimizeGraph
    (
    vsi_nn_graph_t* graph,
--- a/src/tim/vx/internal/include/vsi_nn_node_type.h
+++ b/src/tim/vx/internal/include/vsi_nn_node_type.h
@ -158,6 +158,10 @@
 #include "ops/vsi_nn_op_squeeze.h"
 #include "ops/vsi_nn_op_expand_broadcast.h"
 #include "ops/vsi_nn_op_deconvolution1d.h"
+#include "ops/vsi_nn_op_interp.h"
+#include "ops/vsi_nn_op_resize_1d.h"
+#include "ops/vsi_nn_op_resize_1d_bilinear_internal.h"
+#include "ops/vsi_nn_op_resize_1d_nearest_internal.h"
 /* custom node head define define */
 #include "custom/vsi_nn_custom_node_type.h"

@ -302,6 +306,10 @@ typedef union _vsi_nn_nn_param
    vsi_nn_squeeze_param            squeeze;
    vsi_nn_expand_broadcast_param   expand_broadcast;
    vsi_nn_deconvolution1d_param    deconvolution1d;
+    vsi_nn_interp_param             interp;
+    vsi_nn_resize_1d_param          resize_1d;
+    vsi_nn_resize_1d_bilinear_internal_param resize_1d_bilinear_internal;
+    vsi_nn_resize_1d_nearest_internal_param resize_1d_nearest_internal;
    uint8_t                         client_param[128];

    /* custom node data struct define */
--- a/src/tim/vx/internal/include/vsi_nn_version.h
+++ b/src/tim/vx/internal/include/vsi_nn_version.h
@ -33,7 +33,7 @@ extern "C"{

 #define VSI_NN_VERSION_MAJOR 1
 #define VSI_NN_VERSION_MINOR 1
-#define VSI_NN_VERSION_PATCH 28
+#define VSI_NN_VERSION_PATCH 30
 #define VSI_NN_VERSION \
    (VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)

--- a/src/tim/vx/internal/src/Android.mk
+++ b/src/tim/vx/internal/src/Android.mk
@ -117,6 +117,7 @@ LOCAL_C_INCLUDES += \
 LOCAL_CFLAGS :=  \
    -DLINUX \
    -D'OVXLIB_API=__attribute__((visibility("default")))' \
+    -DANDROID_SDK_VERSION=$(PLATFORM_SDK_VERSION)\
        -Wno-sign-compare \
        -Wno-implicit-function-declaration \
        -Wno-sometimes-uninitialized \
--- a/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/eltwise_unary_cl.c
@ -168,12 +168,14 @@ static vx_param_description_t kernel_param_def[] =
    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
 };

 #define SCALAR_INPUT_SCALE           (2)
 #define SCALAR_INPUT_TAIL            (3)
 #define SCALAR_OUTPUT_SCALE          (4)
 #define SCALAR_OUTPUT_ZP             (5)
+#define SCALAR_ALPHA                 (6)
 #define _CL_PARAM_NUM          _cnt_of_array(kernel_param_def)

 /*
@ -293,6 +295,7 @@ static vsi_nn_kernel_node_t _setup
    float inputTail = (float)inputs[0]->attr.dtype.zero_point * inputScale;
    float outputScale = outputs[0]->attr.dtype.scale;
    float outputZP = (float)outputs[0]->attr.dtype.zero_point + 0.5f;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );

    ret = vsi_nn_kernel_optimize_element_shape(
            (int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
@ -331,6 +334,8 @@ static vsi_nn_kernel_node_t _setup
                    graph, F32, &outputScale );
            node_params[SCALAR_OUTPUT_ZP] = vsi_nn_kernel_scalar_create(
                    graph, F32, &outputZP );
+            node_params[SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
+                    graph, F32, &alpha );

            /* Pass parameters to node. */
            status  = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
@ -369,6 +374,11 @@ OnError:
        vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_ZP] );
    }

+    if (node_params[SCALAR_ALPHA])
+    {
+        vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALPHA] );
+    }
+
    return node;
 } /* _setup() */

--- a/src/tim/vx/internal/src/kernel/cl/moments_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/moments_cl.c
@ -356,7 +356,8 @@ static vsi_nn_kernel_node_t _setup
    int32_t  out_shape[VSI_NN_MAX_DIM_NUM] = {0};
    int32_t  out_rs_flg = 0;
    int32_t axis_num  = 0;
-    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
+    size_t axis_num_temp = 0;
+    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
    int32_t keep_dim  = vsi_nn_kernel_param_get_int32( params, "keep_dim" );
    int32_t first_axis = axis[0];
    int32_t i = 0;
@ -369,6 +370,8 @@ static vsi_nn_kernel_node_t _setup
    float input_scale = inputs[0]->attr.dtype.scale;
    float dim_ratio = (float)1.0 / (float)(width * height);

+    axis_num = (int32_t)axis_num_temp;
+
    if(inputs[0]->attr.dtype.qnt_type == VSI_NN_QNT_TYPE_DFP)
    {
        if (inputs[0]->attr.dtype.fl > 0)
--- a/src/tim/vx/internal/src/kernel/cl/resize_1d_bilinear_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/resize_1d_bilinear_cl.c
@ -0,0 +1,305 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+#define _RESIZE_1D_BILINEAR_KERNEL_SOURCE()      "resize_1d_bilinear"
+
+#define STR(a) #a
+// Add kernel hashtable here
+#define RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
+        (( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) )
+
+#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_BILINEAR_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
+          CVIVANTE_NAMESPACE("cl.resize_1d_bilinear_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
+          _RESIZE_1D_BILINEAR_KERNEL_SOURCE() }
+
+typedef struct
+{
+    uint32_t key;
+    char * function_name;
+    const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _resize_1d_bilinear_kernel_map[] =
+{
+    // Register kernel here
+    PACK_KERNEL_MAP( F32, F32),
+    PACK_KERNEL_MAP( U8,  U8),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_BILINEAR_PARAM_NUM  _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
+
+
+#define SCALAR_SCALE_X         (2)
+#define SCALAR_HALF_PIXEL      (3)
+#define SCALAR_INPUT_SCALE     (4)
+#define SCALAR_INPUT_TAIL      (5)
+#define SCALAR_OUTPUT_SCALE    (6)
+#define SCALAR_OUTPUT_TAIL     (7)
+
+
+#define RESIZE_1D_BILINEAR_NUM         4
+#define RESIZE_1D_BILINEAR_QUANT_NUM   _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
+
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_resize_1d_bilinear_initializer)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    gpu_param_t gpu_param = {
+        3,
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0}
+        };
+    vsi_nn_kernel_tensor_attr_t * output_attr   = NULL;
+    vsi_int_array_t * out_shape                 = NULL;
+
+    output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
+    CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
+
+    out_shape  = output_attr->shape;
+
+    gpu_param.global_scale[0]  = 1;
+    gpu_param.global_scale[1]  = 1;
+    gpu_param.global_scale[2]  = 1;
+
+    gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
+    gpu_param.global_size[0] = gpu_align_p2(
+            (out_shape->data[0] + gpu_param.global_scale[0] - 1)
+            / gpu_param.global_scale[0], 4);
+    gpu_param.global_size[1] = (
+            (out_shape->data[1] + gpu_param.global_scale[1] - 1)
+            / gpu_param.global_scale[1]);
+    gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
+    status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+
+final:
+#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
+    SAFE_FREE_TENSOR_ATTR(output_attr);
+    return status;
+} /* _resize_1d_bilinear_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs,
+    vsi_bool *is_use_u8_kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_dtype_e in_dtype  = F16;
+    vsi_nn_kernel_dtype_e out_dtype = F16;
+    const _kernel_map_type * kernel_map = _resize_1d_bilinear_kernel_map;
+    size_t kernel_map_size              = _cnt_of_array( _resize_1d_bilinear_kernel_map );
+    vx_param_description_t * param_def  = _resize_1d_bilinear_kernel_param_def;
+    size_t param_def_size               = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
+    vx_kernel_initialize_f  initializer = _resize_1d_bilinear_initializer;
+
+    uint32_t key = 0;
+    uint32_t i   = 0;
+
+    in_dtype  = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+    out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+    if (F16 == in_dtype)
+    {
+        in_dtype = F32;
+    }
+    if (F16 == out_dtype)
+    {
+        out_dtype = F32;
+    }
+
+
+    if ((U8 == in_dtype) || (U8 == out_dtype))
+    {
+        param_def_size = RESIZE_1D_BILINEAR_QUANT_NUM;
+        *is_use_u8_kernel = TRUE;
+    }
+    else
+    {
+        param_def_size = RESIZE_1D_BILINEAR_NUM;
+        *is_use_u8_kernel = FALSE;
+    }
+
+    key = RESIZE_1D_BILINEAR_HASH_KEY( in_dtype, out_dtype );
+
+    for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+    {
+        if ( kernel_map[i].key == key )
+        {
+            break;
+        }
+    }
+    if ( i < (uint32_t)kernel_map_size )
+    {
+        snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  kernel_map[i].function_name );
+        kernel->info.parameters  = param_def;
+        kernel->info.numParams   = (uint32_t)param_def_size;
+        kernel->info.initialize  = initializer;
+        // Register code source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
+                kernel_map[i].source_name );
+        // Register binary source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+                kernel_map[i].source_name );
+        status = VSI_SUCCESS;
+    }
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[RESIZE_1D_BILINEAR_QUANT_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+    int32_t in_width     = inputs[0]->attr.size[0];
+    int32_t out_width    = outputs[0]->attr.size[0];
+    float   input_zp     = (float)inputs[0]->attr.dtype.zero_point;
+    float   input_scale  = inputs[0]->attr.dtype.scale;
+    float   input_tail   = -(input_zp * input_scale);
+    float   output_zp    = (float)outputs[0]->attr.dtype.zero_point;
+    float   output_scale = (0 == outputs[0]->attr.dtype.scale) ? 1.0f : 1.0f / outputs[0]->attr.dtype.scale;
+    float   half_pixel_value = 0.0f;
+    float   scale_factor_x = 0.0f;
+    vsi_bool is_use_u8_kernel = FALSE;
+
+    if (align_corners && out_width > 1)
+    {
+        scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
+    }
+    else
+    {
+        scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
+    }
+
+    if (half_pixel_centers)
+    {
+        half_pixel_value = 0.5f;
+    }
+    else
+    {
+        half_pixel_value = 0.0f;
+    }
+
+    status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            size_t node_params_num = RESIZE_1D_BILINEAR_NUM;
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, RESIZE_1D_BILINEAR_QUANT_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_SCALE_X]    = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
+            node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
+            if (is_use_u8_kernel)
+            {
+                node_params[SCALAR_INPUT_SCALE]  = vsi_nn_kernel_scalar_create( graph, F32, &input_scale );
+                node_params[SCALAR_INPUT_TAIL]   = vsi_nn_kernel_scalar_create(graph, F32, &input_tail );
+                node_params[SCALAR_OUTPUT_SCALE] = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
+                node_params[SCALAR_OUTPUT_TAIL]  = vsi_nn_kernel_scalar_create(graph, F32, &output_zp );
+                node_params_num = RESIZE_1D_BILINEAR_QUANT_NUM;
+            }
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+            if (is_use_u8_kernel)
+            {
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_SCALE] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_INPUT_TAIL] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_SCALE] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_OUTPUT_TAIL] );
+            }
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( resize_1d_bilinear, _setup )
+
--- a/src/tim/vx/internal/src/kernel/cl/resize_1d_nearest_cl.c
+++ b/src/tim/vx/internal/src/kernel/cl/resize_1d_nearest_cl.c
@ -0,0 +1,312 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+typedef enum
+{
+    INTERNAL_KERNEL_RESIZE_1D_NEAREST,
+} _internal_kernel_e;
+
+#define _RESIZE_1D_NEAREST_KERNEL_SOURCE      "resize_1d_nearest"
+
+#define STR(a) #a
+// Add kernel hashtable here
+#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
+        (( IN_DTYPE << 8 ) | ( OUT_DTYPE ))
+
+#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE ), \
+          CVIVANTE_NAMESPACE("cl.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
+          _RESIZE_1D_NEAREST_KERNEL_SOURCE }
+
+typedef struct
+{
+    uint32_t key;
+    char * function_name;
+    const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
+{
+    // Register kernel here
+    PACK_KERNEL_MAP( F32, F32),
+    PACK_KERNEL_MAP( U8,  U8),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_NEAREST_PARAM_NUM  5
+#define _RESIZE_1D_NEAREST_QUANT_NUM  _cnt_of_array( _resize_1d_nearest_kernel_param_def )
+
+#define SCALAR_SCALE_X         (2)
+#define SCALAR_HALF_PIXEL      (3)
+#define SCALAR_ROUND_VALUE     (4)
+#define SCALAR_SCALE_VALUE     (5)
+#define SCALAR_TAIL_VALUE      (6)
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    gpu_param_t gpu_param = {
+        3,
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0}
+        };
+    vsi_nn_kernel_tensor_attr_t * output_attr   = NULL;
+    vsi_int_array_t * out_shape                 = NULL;
+
+    output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
+    CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
+
+    out_shape  = output_attr->shape;
+
+    gpu_param.global_scale[0]  = 1;
+    gpu_param.global_scale[1]  = 1;
+    gpu_param.global_scale[2]  = 1;
+
+    gpu_param.dim = (out_shape->size < 3 || 1 == out_shape->data[2]) ? 2 : 3;
+    gpu_param.global_size[0] = gpu_align_p2(
+            (out_shape->data[0] + gpu_param.global_scale[0] - 1)
+            / gpu_param.global_scale[0], 4);
+    gpu_param.global_size[1] = (
+            (out_shape->data[1] + gpu_param.global_scale[1] - 1)
+            / gpu_param.global_scale[1]);
+    gpu_param.global_size[2] = out_shape->size > 2 ? out_shape->data[2] : 1;
+    status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+
+final:
+#define SAFE_FREE_TENSOR_ATTR(_PTR) if( _PTR ) { vsi_nn_kernel_tensor_attr_release( &_PTR ); _PTR = NULL; }
+    SAFE_FREE_TENSOR_ATTR(output_attr);
+    return status;
+} /* _resize_1d_nearest_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs,
+    vsi_bool *is_use_u8_kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_dtype_e in_dtype  = F16;
+    vsi_nn_kernel_dtype_e out_dtype = F16;
+    const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
+    size_t kernel_map_size              = _cnt_of_array( _resize_1d_nearest_kernel_map );
+    vx_param_description_t * param_def  = _resize_1d_nearest_kernel_param_def;
+    size_t param_def_size               = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
+    vx_kernel_initialize_f  initializer = _resize_1d_nearest_initializer;
+
+    uint32_t key = 0;
+    uint32_t i   = 0;
+
+    in_dtype  = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+    out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+    if (F16 == in_dtype)
+    {
+        in_dtype = F32;
+    }
+    if (F16 == out_dtype)
+    {
+        out_dtype = F32;
+    }
+
+    if ((U8 == in_dtype) || (U8 == out_dtype))
+    {
+        param_def_size = _RESIZE_1D_NEAREST_QUANT_NUM;
+        *is_use_u8_kernel = TRUE;
+    }
+    else
+    {
+        param_def_size = _RESIZE_1D_NEAREST_PARAM_NUM;
+        *is_use_u8_kernel = FALSE;
+    }
+
+    key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype );
+
+    for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+    {
+        if ( kernel_map[i].key == key )
+        {
+            break;
+        }
+    }
+    if ( i < (uint32_t)kernel_map_size )
+    {
+        snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  kernel_map[i].function_name );
+        kernel->info.parameters  = param_def;
+        kernel->info.numParams   = (uint32_t)param_def_size;
+        kernel->info.initialize  = initializer;
+        // Register code source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 1,
+                kernel_map[i].source_name );
+        // Register binary source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+                kernel_map[i].source_name );
+        status = VSI_SUCCESS;
+    }
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_QUANT_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+    int32_t in_width     = inputs[0]->attr.size[0];
+    int32_t out_width    = outputs[0]->attr.size[0];
+    float   input_zp     = (float)inputs[0]->attr.dtype.zero_point;
+    float   input_scale  = inputs[0]->attr.dtype.scale;
+    float   output_scale = (0 == outputs[0]->attr.dtype.scale) ? \
+                           input_scale : input_scale / outputs[0]->attr.dtype.scale;
+    float   output_tail  = (float)outputs[0]->attr.dtype.zero_point - input_zp * output_scale;
+    float   half_pixel_value = 0.0f;
+    float   round_value    = 0.0f;
+    float   scale_factor_x = 0.0f;
+    vsi_bool is_use_u8_kernel = FALSE;
+
+    if (align_corners && out_width > 1)
+    {
+        scale_factor_x = ((vx_float32)(in_width - 1) * 1.0f) / (vx_float32)(out_width - 1);
+    }
+    else
+    {
+        scale_factor_x = ((vx_float32)in_width * 1.0f) / (vx_float32)out_width;
+    }
+
+    if (align_corners)
+    {
+        round_value = 0.5f;
+    }
+    else
+    {
+        round_value = 0.0f;
+    }
+
+    if (half_pixel_centers)
+    {
+        half_pixel_value = 0.5f;
+    }
+    else
+    {
+        half_pixel_value = 0.0f;
+    }
+
+    status = _query_kernel( kernel, inputs, outputs, &is_use_u8_kernel );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            size_t node_params_num = _RESIZE_1D_NEAREST_PARAM_NUM;
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_SCALE_X]     = vsi_nn_kernel_scalar_create( graph, F32, &scale_factor_x );
+            node_params[SCALAR_HALF_PIXEL]  = vsi_nn_kernel_scalar_create( graph, F32, &half_pixel_value );
+            node_params[SCALAR_ROUND_VALUE] = vsi_nn_kernel_scalar_create( graph, F32, &round_value );
+            if (is_use_u8_kernel)
+            {
+                node_params[SCALAR_SCALE_VALUE]  = vsi_nn_kernel_scalar_create( graph, F32, &output_scale );
+                node_params[SCALAR_TAIL_VALUE]   = vsi_nn_kernel_scalar_create(graph, F32, &output_tail );
+                node_params_num = _RESIZE_1D_NEAREST_QUANT_NUM;
+            }
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, node_params_num );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_X] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ROUND_VALUE] );
+            if (is_use_u8_kernel)
+            {
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_SCALE_VALUE] );
+                vsi_nn_kernel_scalar_release( &node_params[SCALAR_TAIL_VALUE] );
+            }
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CL( resize_1d_nearest, _setup )
+
--- a/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c
+++ b/src/tim/vx/internal/src/kernel/cpu/eltwise_unary_cpu.c
@ -49,7 +49,7 @@ typedef enum
 } unary_type_e;


-#define _CPU_ARG_NUM            (1)
+#define _CPU_ARG_NUM            (2)
 #define _CPU_INPUT_NUM          (1)
 #define _CPU_OUTPUT_NUM         (1)
 #define _CPU_IO_NUM             (_CPU_INPUT_NUM + _CPU_OUTPUT_NUM)
@ -71,9 +71,9 @@ static float log_eval(float data)
    return logf(data);
 }

-static float elu_eval(float data)
+static float elu_eval(float data, float alpha)
 {
-    return data >=0 ? data : expf(data) - 1;
+    return data >=0 ? data : expf(data) * alpha - alpha;
 }

 static float neg_eval(float data)
@ -114,6 +114,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
    size_t out_elements = 0;
    vsi_nn_kernel_tensor_attr_t * attr[_CPU_IO_NUM] = { NULL };
    int32_t i;
+    float alpha = 0;
    int32_t unary_type = 0;

    tensors[0]  = (vsi_nn_kernel_tensor_t)param[0];
@ -126,6 +127,8 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)

    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &unary_type);
    CHECK_STATUS_FAIL_GOTO(status, final );
+    status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[3], &alpha);
+    CHECK_STATUS_FAIL_GOTO(status, final );

    buffer[0] = (float*)vsi_nn_kernel_tensor_create_buffer( tensors[0], attr[0], TRUE );
    CHECK_PTR_FAIL_GOTO( buffer[0], "Create input buffer fail.", final );
@ -151,7 +154,7 @@ DEF_KERNEL_EXECUTOR(_eltwise_unary_exec)
            data = log_eval(data);
            break;
        case UNARY_ELU:
-            data = elu_eval(data);
+            data = elu_eval(data, alpha);
            break;
        case UNARY_NEG:
            data = neg_eval(data);
@ -193,9 +196,11 @@ static vx_param_description_t kernel_param_def[] =
    {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
 };

 #define INPUT_FUNC_TYPE           (2)
+#define INPUT_SCALAR_ALPHA        (3)

 static const vx_kernel_description_t _kernel_info =
 {
@ -237,6 +242,7 @@ static vsi_nn_kernel_node_t _setup
    vsi_status status = VSI_SUCCESS;
    vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
    vsi_nn_kernel_node_t node = NULL;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );

    status = _query_kernel( inputs, outputs, kernel );
    if( VSI_SUCCESS == status)
@ -249,10 +255,13 @@ static vsi_nn_kernel_node_t _setup
                    inputs, _CPU_INPUT_NUM, outputs, _CPU_OUTPUT_NUM );
            backend_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
                    graph, I32, &unary_type );
+            backend_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
+                    graph, F32, &alpha );
            /* Pass parameters to node. */
            status = vsi_nn_kernel_node_pass_param( node, backend_params, _CPU_PARAM_NUM );

            vsi_nn_kernel_scalar_release( &backend_params[INPUT_FUNC_TYPE] );
+            vsi_nn_kernel_scalar_release( &backend_params[INPUT_SCALAR_ALPHA] );
        }
        else
        {
--- a/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c
+++ b/src/tim/vx/internal/src/kernel/cpu/moments_cpu.c
@ -258,11 +258,14 @@ static vsi_nn_kernel_node_t _setup
    vsi_nn_kernel_node_param_t backend_params[_CPU_PARAM_NUM] = {NULL};
    vsi_nn_kernel_node_t node = NULL;
    int32_t axis_num  = 0;
-    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
+    size_t axis_num_temp = 0;
+    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
    vsi_bool is_continue_axis = TRUE;
    uint32_t mask = 0;
    int32_t i = 0;

+    axis_num = (int32_t)axis_num_temp;
+
    for ( i = 1; i < axis_num; i++)
    {
        if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)
--- a/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c
+++ b/src/tim/vx/internal/src/kernel/cpu/resize_1d_bilinear_cpu.c
@ -0,0 +1,271 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+#define _KERNEL_NAME        CVIVANTE_NAMESPACE("cpu.resize_1d_bilinear")
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_bilinear_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_BILINEAR_PARAM_NUM  _cnt_of_array( _resize_1d_bilinear_kernel_param_def )
+
+#define SCALAR_ALIGN_CORNERS         (2)
+#define SCALAR_HALF_PIXEL            (3)
+
+/*
+ * Kernel function
+ */
+DEF_KERNEL_EXECUTOR(_compute)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
+    float *f32_in_buffer[_INPUT_NUM] = {NULL};
+    float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
+    vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
+    size_t   out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
+    size_t   out_elements[_OUTPUT_NUM] = {0};
+    size_t   out_bytes[_OUTPUT_NUM] = {0};
+    uint32_t i = 0;
+    int32_t  align_corners = 0;
+    int32_t  half_pixel_centers = 0;
+    float    width_scale = 1.0f;
+    uint32_t input_width = 0, output_width = 0;
+    uint32_t w = 0, out = 0;
+    uint32_t output_dims = 0;
+    float    data00 = .0f, data01 = .0f, interpolation = .0f;
+    uint32_t index = 0;
+    uint32_t outer = 0;
+
+    /* prepare data */
+    for (i = 0; i < _INPUT_NUM; i ++)
+    {
+        input[i] = (vsi_nn_kernel_tensor_t)param[i];
+        in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
+        f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
+        CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
+
+    }
+    for (i = 0; i < _OUTPUT_NUM; i ++)
+    {
+        output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
+        out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
+        vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
+        out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
+        out_bytes[i] = out_elements[i] * sizeof(float);
+        f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
+        CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
+        memset( f32_out_buffer[i], 0, out_bytes[i] );
+    }
+
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
+    input_width       = in_attr[0]->shape->data[0];
+    output_width      = out_attr[0]->shape->data[0];
+    output_dims       = (uint32_t)out_attr[0]->shape->size;
+
+    if (align_corners && output_width > 1)
+    {
+        width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
+    }
+    else
+    {
+        width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
+    }
+
+    outer = 1;
+
+    for (i = 1; i < output_dims; i++)
+    {
+        outer = outer * out_attr[0]->shape->data[i];
+    }
+
+    for (out = 0; out < outer; out++)
+    {
+        vx_int32 input_base = out * input_width;
+        vx_int32 output_base = out * output_width;
+        for (w = 0; w < output_width; w ++)
+        {
+            vx_float32 input_w;
+            vx_int32 w0;
+            vx_int32 w1;
+            if (half_pixel_centers)
+            {
+                input_w = ((vx_float32)w + 0.5f) * width_scale - 0.5f;
+            }
+            else
+            {
+                input_w = w * width_scale;
+            }
+            w0 = (vx_int32)input_w;
+            w1 = input_w < 0 ? 0 : vsi_nn_min(w0 + 1, (vx_int32)(input_width - 1));
+            index = input_base + w0;
+            data00 = f32_in_buffer[0][index];
+            index = input_base + w1;
+            data01 = f32_in_buffer[0][index];
+
+            interpolation = data00 * (1 - (input_w - w0)) +
+                            data01 * (input_w - w0);
+            index = output_base + w;
+            f32_out_buffer[0][index] = interpolation;
+        }
+    }
+
+
+    /* save data */
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
+                f32_out_buffer[i], out_elements[i] );
+        CHECK_STATUS_FAIL_GOTO( status, final );
+    }
+
+final:
+    for (i = 0; i < _INPUT_NUM; i++)
+    {
+        if (f32_in_buffer[i])
+        {
+            free(f32_in_buffer[i]);
+            f32_in_buffer[i] = NULL;
+        }
+        if (in_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
+        }
+    }
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        if (f32_out_buffer[i])
+        {
+            free(f32_out_buffer[i]);
+            f32_out_buffer[i] = NULL;
+        }
+        if (out_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
+        }
+    }
+
+    return status;
+} /* _compute() */
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs
+    /* Add extra params */
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  _KERNEL_NAME );
+    kernel->info.function    = _compute;
+    kernel->info.parameters  = _resize_1d_bilinear_kernel_param_def;
+    kernel->info.numParams   = _cnt_of_array( _resize_1d_bilinear_kernel_param_def );
+    status = VSI_SUCCESS;
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_BILINEAR_PARAM_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+
+    status = _query_kernel( kernel, inputs, outputs );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_BILINEAR_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
+            node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_BILINEAR_PARAM_NUM );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CPU( resize_1d_bilinear, _setup )
+
--- a/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c
+++ b/src/tim/vx/internal/src/kernel/cpu/resize_1d_nearest_cpu.c
@ -0,0 +1,271 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+#define _KERNEL_NAME        CVIVANTE_NAMESPACE("cpu.resize_1d_nearest")
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_NEAREST_PARAM_NUM  _cnt_of_array( _resize_1d_nearest_kernel_param_def )
+
+#define SCALAR_ALIGN_CORNERS         (2)
+#define SCALAR_HALF_PIXEL            (3)
+
+/*
+ * Kernel function
+ */
+DEF_KERNEL_EXECUTOR(_compute)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_tensor_t input[_INPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_t output[_OUTPUT_NUM] = {NULL};
+    float *f32_in_buffer[_INPUT_NUM] = {NULL};
+    float *f32_out_buffer[_OUTPUT_NUM] = {NULL};
+    vsi_nn_kernel_tensor_attr_t *in_attr[_INPUT_NUM];
+    vsi_nn_kernel_tensor_attr_t *out_attr[_OUTPUT_NUM];
+    size_t   out_stride_size[_OUTPUT_NUM][VSI_NN_MAX_DIM_NUM] = {{1}};
+    size_t   out_elements[_OUTPUT_NUM] = {0};
+    size_t   out_bytes[_OUTPUT_NUM] = {0};
+    uint32_t i = 0;
+    int32_t  align_corners = 0;
+    int32_t  half_pixel_centers = 0;
+    float    width_scale = 1.0f;
+    uint32_t input_width = 0, output_width = 0;
+    uint32_t w = 0, out = 0;
+    uint32_t output_dims = 0;
+    uint32_t outer = 0;
+    /* prepare data */
+    for (i = 0; i < _INPUT_NUM; i ++)
+    {
+        input[i] = (vsi_nn_kernel_tensor_t)param[i];
+        in_attr[i] = vsi_nn_kernel_tensor_attr_create( input[i] );
+        f32_in_buffer[i] = (float*)vsi_nn_kernel_tensor_create_buffer( input[i], in_attr[i], TRUE );
+        CHECK_PTR_FAIL_GOTO( f32_in_buffer[i], "Create input0 buffer fail.", final );
+
+    }
+    for (i = 0; i < _OUTPUT_NUM; i ++)
+    {
+        output[i] = (vsi_nn_kernel_tensor_t)param[i + _INPUT_NUM];
+        out_attr[i] = vsi_nn_kernel_tensor_attr_create( output[i] );
+        vsi_nn_kernel_tensor_attr_get_stride( out_attr[i], out_stride_size[i] );
+        out_elements[i] = vsi_nn_kernel_tensor_attr_get_size( out_attr[i] );
+        out_bytes[i] = out_elements[i] * sizeof(float);
+        f32_out_buffer[i] = (float *)malloc( out_bytes[i] );
+        CHECK_PTR_FAIL_GOTO( f32_out_buffer[i], "Create output buffer fail.", final );
+        memset( f32_out_buffer[i], 0, out_bytes[i] );
+    }
+
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_ALIGN_CORNERS], &(align_corners));
+    vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[SCALAR_HALF_PIXEL], &(half_pixel_centers));
+    input_width       = in_attr[0]->shape->data[0];
+    output_width      = out_attr[0]->shape->data[0];
+    output_dims       = (uint32_t)out_attr[0]->shape->size;
+
+    if (align_corners && output_width > 1)
+    {
+        width_scale = ((vx_float32)(input_width - 1) * 1.0f) / (vx_float32)(output_width - 1);
+    }
+    else
+    {
+        width_scale = ((vx_float32)input_width * 1.0f) / (vx_float32)output_width;
+    }
+
+    outer = 1;
+
+    for (i = 1; i < output_dims; i++)
+    {
+        outer = outer * out_attr[0]->shape->data[i];
+    }
+
+    for (out = 0; out < outer; out++)
+    {
+        vx_int32 input_base  = out * input_width;
+        vx_int32 output_base = out * output_width;
+
+        for (w = 0; w < output_width; w ++)
+        {
+            float      input_w;
+            uint32_t   in_x;
+            int32_t    in_index;
+            int32_t    out_index;
+
+            if (half_pixel_centers)
+            {
+                input_w = ((float)w + 0.5f) * width_scale;
+            }
+            else
+            {
+                input_w = w * width_scale;
+            }
+            if (align_corners)
+            {
+                in_x = vsi_nn_min((uint32_t)simple_round(input_w), input_width - 1);
+            }
+            else
+            {
+                in_x = vsi_nn_min((uint32_t)floorf(input_w), input_width - 1);
+            }
+            in_index    = in_x + input_base;
+            out_index   = w + output_base;
+            f32_out_buffer[0][out_index] = f32_in_buffer[0][in_index];
+        }
+    }
+
+    /* save data */
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        status = vsi_nn_kernel_tensor_write_from_float( output[i], out_attr[i],
+                f32_out_buffer[i], out_elements[i] );
+        CHECK_STATUS_FAIL_GOTO( status, final );
+    }
+
+final:
+    for (i = 0; i < _INPUT_NUM; i++)
+    {
+        if (f32_in_buffer[i])
+        {
+            free(f32_in_buffer[i]);
+            f32_in_buffer[i] = NULL;
+        }
+        if (in_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &in_attr[i] );
+        }
+    }
+
+    for (i = 0; i < _OUTPUT_NUM; i++)
+    {
+        if (f32_out_buffer[i])
+        {
+            free(f32_out_buffer[i]);
+            f32_out_buffer[i] = NULL;
+        }
+        if (out_attr[i])
+        {
+            vsi_nn_kernel_tensor_attr_release( &out_attr[i] );
+        }
+    }
+
+    return status;
+} /* _compute() */
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs
+    /* Add extra params */
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  _KERNEL_NAME );
+    kernel->info.function    = _compute;
+    kernel->info.parameters  = _resize_1d_nearest_kernel_param_def;
+    kernel->info.numParams   = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
+    status = VSI_SUCCESS;
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM];
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+
+    status = _query_kernel( kernel, inputs, outputs );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
+            node_params[SCALAR_HALF_PIXEL] = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_CPU( resize_1d_nearest, _setup )
+
--- a/src/tim/vx/internal/src/kernel/evis/add_mean_std_norm_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/add_mean_std_norm_evis.c
@ -279,10 +279,10 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)

    if( U8 == input_dtype && F16 == output_dtype )
    {
-        vx_uint16  M0                 = 0;
-        vx_int8    postShift          = 0;
-        vx_uint32  multAndoutZP0[2]   = {0};
-        vx_uint32  multAndoutZP1[2]   = {0};
+        uint16_t   M0                 = 0;
+        int32_t    postShift          = 0;
+        uint32_t   multAndoutZP0[2]   = {0};
+        uint32_t   multAndoutZP1[2]   = {0};

        gpu_dp_inst_t uniU8MulAndPostShift_0_Lo_2x8 = {{
            0xdddddddd, // TCfg
@ -305,12 +305,12 @@ DEF_KERNEL_INITIALIZER(_add_mean_std_norm_initializer)
            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
        }, GPU_DP_TYPE_16};

-        vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);
        multAndoutZP0[0] = (vx_uint32)(M0);
        multAndoutZP0[1] = (vx_uint32)((output_ZP << postShift) - input_ZP * M0);
        uniU8MulAndPostShift_0_Lo_2x8.data[7] |= (postShift & 0x1F);

-        vsi_nn_GetFP32MultiAndPostShift(scaleIn1 / scaleOut, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(scaleIn1 / scaleOut, &M0, &postShift);
        multAndoutZP1[0] = (vx_uint32)(M0);
        multAndoutZP1[1] = (vx_uint32)((output_ZP << postShift) - input_ZP1 * M0);
        uniU8MulAndPostShift_1_Lo_2x8.data[7] |= (postShift & 0x1F);
--- a/src/tim/vx/internal/src/kernel/evis/clip_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/clip_evis.c
@ -268,7 +268,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
        {
            uint32_t  multAndoutZP[2]    = {0};
            uint16_t  M0                 = 0;
-            int8_t    postShift          = 0;
+            int32_t   postShift          = 0;
            gpu_dp_inst_t  uniDataMulAndPostShift_2x8 = {{
                0xdddddddd, // TCfg
                0x44444444, // ASelt
@ -279,7 +279,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
                0x00000000, 0x00000000, 0x00000000, 0x00000000,
                0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
            }, GPU_DP_TYPE_16};
-            vsi_nn_GetFP32MultiAndPostShift(scaleIn / scaleOut, &M0, &postShift);
+            gpu_quantize_multiplier_16bit(scaleIn / scaleOut, &M0, &postShift);

            multAndoutZP[0] = (uint32_t)(M0);
            multAndoutZP[1] = (uint32_t)(output_ZP << postShift );
@ -434,7 +434,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
        int32_t   packedMaxData[4];
        float     uint8Scale = scaleIn / scaleOut;
        uint16_t  M0                   = 0;
-        int8_t    postShift            = 0;
+        int32_t   postShift            = 0;
        uint32_t  multAndoutZP[2]      = {0};
        gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
            0xdddddddd, // TCfg
@ -457,7 +457,7 @@ DEF_KERNEL_INITIALIZER(_clip_initializer)
            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
        }, GPU_DP_TYPE_16};

-        vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
        multAndoutZP[0] = (uint32_t)(M0);
        multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);

--- a/src/tim/vx/internal/src/kernel/evis/depthwise_conv1d_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/depthwise_conv1d_evis.c
@ -702,27 +702,66 @@ static vsi_nn_kernel_node_t _setup
    )
 {
    vsi_status status = VSI_FAILURE;
-    vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM];
+    vsi_nn_kernel_node_param_t node_params[_DEPTHWISE_CONV1D_PARAM_NUM] = {NULL};
    vsi_nn_kernel_node_t node = NULL;
    int32_t weight_pad_front[VSI_NN_MAX_DIM_NUM] = {0};
    int32_t weight_pad_end[VSI_NN_MAX_DIM_NUM] = {0};
    vsi_nn_tensor_t * weights = NULL;
    vsi_nn_tensor_t * biases = NULL;
    vsi_nn_tensor_t *temp_tensor[3] = {NULL};
+    vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
+    int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
+    int32_t new_rank = 2;
+    uint32_t i = 0;
    int32_t stride     = vsi_nn_kernel_param_get_int32( params, "stride" );
    int32_t pad_front  = vsi_nn_kernel_param_get_int32( params, "pad_front" );
    int32_t pad_end  = vsi_nn_kernel_param_get_int32( params, "pad_end" );
    int32_t dilation   = vsi_nn_kernel_param_get_int32( params, "dilation" );
    _internal_kernel_size_e ks   = KN;

-    weight_pad_end[0] = gpu_align_np2_safe(inputs[1]->attr.size[0], 8) - inputs[1]->attr.size[0];
+    if (!((VSI_NN_TYPE_UINT8 == inputs[0]->attr.dtype.vx_type)
+       && (VSI_NN_TYPE_UINT8 == inputs[1]->attr.dtype.vx_type)
+       && (NULL == inputs[2] || VSI_NN_TYPE_INT32 == inputs[2]->attr.dtype.vx_type)
+       && (VSI_NN_TYPE_UINT8 == outputs[0]->attr.dtype.vx_type)))
+    {
+        return NULL;
+    }

-    weights = vsi_nn_pad_tensor(graph, inputs[1], weight_pad_front, weight_pad_end, inputs[1]->attr.dim_num,
-        VSI_NN_PAD_MODE_CONSTANT, 0);
+    reshape_tensors[0] = inputs[0];

-    biases = vsi_nn_merge_input_zeropoint_to_bias(graph, inputs[0], inputs[1], inputs[2]);
+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
+        shape[0] = inputs[1]->attr.size[0];
+        shape[1] = 1;
+        for (i = 1; i < inputs[1]->attr.dim_num; i++)
+        {
+            shape[1] *= inputs[1]->attr.size[i];
+        }
+        reshape_tensors[1] = vsi_nn_reshape_tensor( graph,
+                inputs[1], (uint32_t*)shape, new_rank );
+    }
+    else
+    {
+        reshape_tensors[1] = inputs[1];
+    }

-    temp_tensor[0] = inputs[0];
+    if (inputs[2] && inputs[2]->attr.dim_num == 1)
+    {
+        shape[0] = inputs[2]->attr.size[0];
+        shape[1] = 1;
+        new_rank = 2;
+        reshape_tensors[2] = vsi_nn_reshape_tensor( graph,
+                inputs[2], (uint32_t*)shape, new_rank );
+    }
+
+    weight_pad_end[0] = gpu_align_np2_safe(reshape_tensors[1]->attr.size[0], 8) - reshape_tensors[1]->attr.size[0];
+
+    weights = vsi_nn_pad_tensor(graph, reshape_tensors[1], weight_pad_front, weight_pad_end,
+        reshape_tensors[1]->attr.dim_num, VSI_NN_PAD_MODE_CONSTANT, 0);
+
+    biases = vsi_nn_merge_input_zeropoint_to_bias(graph, reshape_tensors[0], reshape_tensors[1], reshape_tensors[2]);
+
+    temp_tensor[0] = reshape_tensors[0];
    temp_tensor[1] = weights;
    temp_tensor[2] = biases;

@ -760,6 +799,16 @@ static vsi_nn_kernel_node_t _setup
        }
    }

+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
+        vsi_nn_ReleaseTensor( &reshape_tensors[1] );
+    }
+
+    if (inputs[2] && inputs[2]->attr.dim_num == 1)
+    {
+        vsi_nn_ReleaseTensor( &reshape_tensors[2] );
+    }
+
    if (weights)
    {
        vsi_nn_ReleaseTensor(&weights);
--- a/src/tim/vx/internal/src/kernel/evis/detect_post_box_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/detect_post_box_evis.c
@ -164,9 +164,9 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
    else if ((U8 == input_attr->dtype) || (U8 == input1_attr->dtype))
    {
        uint16_t  M0                 = 0;
-        int8_t    postShift0         = 0;
+        int32_t   postShift0         = 0;
        uint16_t  M1                 = 0;
-        int8_t    postShift1         = 0;
+        int32_t   postShift1         = 0;
        uint32_t  i                  = 0;
        gpu_dp_inst_t uniU8SubZptoF32Conv0_4x4 = {{
            0x09090909, // TCfg
@ -188,8 +188,8 @@ DEF_KERNEL_INITIALIZER(_detect_post_box_initializer)
            0x00010001, 0x00000000, 0x00010001, 0x00000000,
            0x00010001, 0x00000000, 0x00010001, 0x00000000 // Constant
        }, GPU_DP_TYPE_16 };
-        vsi_nn_GetFP32MultiAndPostShift(scaleIn0, &M0, &postShift0);
-        vsi_nn_GetFP32MultiAndPostShift(scaleIn1, &M1, &postShift1);
+        gpu_quantize_multiplier_16bit(scaleIn0, &M0, &postShift0);
+        gpu_quantize_multiplier_16bit(scaleIn1, &M1, &postShift1);
        uniU8SubZptoF32Conv0_4x4.data[7] |= (postShift0 & 0x1F);
        uniU8SubZptoF32Conv1_4x4.data[7] |= (postShift1 & 0x1F);
        for ( i = 0; i < 8; i++ )
--- a/src/tim/vx/internal/src/kernel/evis/eltwise_unary_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/eltwise_unary_evis.c
@ -266,9 +266,11 @@ static vx_param_description_t kernel_param_def[] =
    {VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
 };

 #define INPUT_FUNC_TYPE           (2)
+#define INPUT_SCALAR_ALPHA        (3)
 #define _CL_PARAM_NUM          _cnt_of_array(kernel_param_def)

 /*
@ -296,6 +298,7 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
    float    inputTail                      = 0;
    float    outputScale                    = 1.0f;
    float    outputZP                       = 0;
+    float    alpha                          = 0;
    uint32_t pack_key;

    attr[0] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
@ -303,7 +306,9 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
    attr[1] = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
    CHECK_PTR_FAIL_GOTO( attr[1], "Create tensor attr buffer fail.", final );

-    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &type);
+    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[INPUT_FUNC_TYPE], &type);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+    status = vsi_nn_kernel_scalar_read_float32((vsi_nn_kernel_scalar_t)param[INPUT_SCALAR_ALPHA], &alpha);
    CHECK_STATUS_FAIL_GOTO(status, final );

    out_shape  = attr[1]->shape;
@ -408,6 +413,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
                    "uniConvBF16toF32_Part1_2x8", &uniConvBF16toF32_Part1_2x8 );
            status |= vsi_nn_kernel_gpu_add_param( node,
                    "uniExtractOddData_2x8", &uniExtractOddData_2x8 );
+            status |= vsi_nn_kernel_gpu_add_param( node,
+                    "alpha", &alpha );
            CHECK_STATUS_FAIL_GOTO(status, final );
        }
        break;
@ -466,6 +473,8 @@ DEF_KERNEL_INITIALIZER(_eltwise_unary_initializer)
                    "outputScale", &outputScale );
            status |= vsi_nn_kernel_gpu_add_param( node,
                    "outputZP", &outputZP );
+            status |= vsi_nn_kernel_gpu_add_param( node,
+                    "alpha", &alpha );

            if (attr[1]->dtype == F16)
            {
@ -555,7 +564,8 @@ static vsi_nn_kernel_node_t _setup
    vsi_nn_tensor_t* rs_tensors[2] = { NULL };
    int32_t shape[VSI_NN_MAX_DIM_NUM] = { 0 };
    int32_t new_rank = 0;
-    vsi_bool ret;
+    vsi_bool ret = FALSE;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );

    ret = vsi_nn_kernel_optimize_element_shape(
            (int32_t *)inputs[0]->attr.size, inputs[0]->attr.dim_num,
@ -586,6 +596,8 @@ static vsi_nn_kernel_node_t _setup
                    rs_tensors, 1, &rs_tensors[1], 1 );
            node_params[INPUT_FUNC_TYPE] = vsi_nn_kernel_scalar_create(
                    graph, I32, &unary_type );
+            node_params[INPUT_SCALAR_ALPHA] = vsi_nn_kernel_scalar_create(
+                    graph, F32, &alpha );

            /* Pass parameters to node. */
            status  = vsi_nn_kernel_node_pass_param( node, node_params, _CL_PARAM_NUM );
@ -609,6 +621,11 @@ OnError:
        vsi_nn_kernel_scalar_release( &node_params[INPUT_FUNC_TYPE] );
    }

+    if (node_params[INPUT_SCALAR_ALPHA])
+    {
+        vsi_nn_kernel_scalar_release( &node_params[INPUT_SCALAR_ALPHA] );
+    }
+
    return node;
 } /* _setup() */

--- a/src/tim/vx/internal/src/kernel/evis/matrixmul_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/matrixmul_evis.c
@ -181,8 +181,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
    float       dstScale   = 0;
    uint16_t M0            = 0;
    uint16_t M1            = 0;
-    int8_t   postShift0    = 0;
-    int8_t   postShift1    = 0;
+    int32_t  postShift0    = 0;
+    int32_t  postShift1    = 0;

    uint32_t pack_key = 0;
    int32_t  ac2zero  = 0;
@ -279,8 +279,8 @@ DEF_KERNEL_INITIALIZER(_matrix_mul_initializer)
        dstScale = 1;
        dstZP = 0.0f;
    }
-    vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postShift0);
-    vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postShift1);
+    gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postShift0);
+    gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postShift1);

    mulKIn0In1Zp = (float)((int)(K + 3) / 4 * 4 * src1ZP * src0ZP);
    inOutScale =  src0Scale * src1Scale / dstScale;
--- a/src/tim/vx/internal/src/kernel/evis/moments_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/moments_evis.c
@ -588,7 +588,8 @@ static vsi_nn_kernel_node_t _setup
    vsi_nn_kernel_node_param_t node_params[_MOMENTS_PARAM_NUM] = { NULL };
    vsi_nn_kernel_node_t node = NULL;
    int32_t axis_num  = 0;
-    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", (size_t*)&axis_num);
+    size_t axis_num_temp = 0;
+    int32_t* axis = (int32_t *) vsi_nn_kernel_param_get_buffer( params, "axis", &axis_num_temp);
    int32_t axis_first  = axis[0];
    int32_t shapes[2][VSI_NN_MAX_DIM_NUM] = { { 1, 1, 1, 1 } };
    vsi_nn_tensor_t* reshape_tensors[3] = { NULL };
@ -602,6 +603,8 @@ static vsi_nn_kernel_node_t _setup
    vsi_bool image_2d = FALSE;
    vsi_bool is_continue_axis = TRUE;

+    axis_num = (int32_t)axis_num_temp;
+
    for ( i = 1; i < axis_num; i++)
    {
        if ( axis[i] != (axis[i - 1] + 1) && axis[0] == 0)
--- a/src/tim/vx/internal/src/kernel/evis/poolwithargmax_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/poolwithargmax_evis.c
@ -139,7 +139,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
    int32_t  input_fl                          = 0;
    int32_t  output_fl                         = 0;
    uint16_t M0                                = 0;
-    int8_t   postShift                         = 0;
+    int32_t  postShift                         = 0;
    float    inputScale                        = 1.0f;
    int32_t  input_ZP                          = 0;
    float    outputScale                       = 1.0f;
@ -193,7 +193,7 @@ DEF_KERNEL_INITIALIZER(_poolwithargmax_initializer)
    if ( ( input_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM )
       && ( output_attr->quant == VSI_NN_KERNEL_QUANT_ASYMM ) )
    {
-        vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);
    }

    image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);
--- a/src/tim/vx/internal/src/kernel/evis/pow_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/pow_evis.c
@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
    float       dstZP      = 0;
    float       dstScale   = 1.0f;

-    int8_t     postshift0  = 0;
-    int8_t     postshift1  = 0;
+    int32_t    postshift0  = 0;
+    int32_t    postshift1  = 0;
    float      outScale_fl = 1;

    uint16_t M0            = 0;
@ -229,7 +229,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
        src0ZP     = attr[0]->asymm.zero_point;
        src0Scale  = attr[0]->asymm.scale;

-        vsi_nn_GetFP32MultiAndPostShift(src0Scale / 1.0f, &M0, &postshift0);
+        gpu_quantize_multiplier_16bit(src0Scale / 1.0f, &M0, &postshift0);
    }

    if ( attr[1]->quant == VSI_NN_KERNEL_QUANT_DFP )
@ -243,7 +243,7 @@ DEF_KERNEL_INITIALIZER(_pow_initializer)
        src1ZP     = attr[1]->asymm.zero_point;
        src1Scale  = attr[1]->asymm.scale;

-        vsi_nn_GetFP32MultiAndPostShift(src1Scale / 1.0f, &M1, &postshift1);
+        gpu_quantize_multiplier_16bit(src1Scale / 1.0f, &M1, &postshift1);
    }

    if ( attr[2]->quant == VSI_NN_KERNEL_QUANT_DFP )
--- a/src/tim/vx/internal/src/kernel/evis/resize_1d_bilinear_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/resize_1d_bilinear_evis.c
--- a/src/tim/vx/internal/src/kernel/evis/resize_1d_nearest_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/resize_1d_nearest_evis.c
@ -0,0 +1,533 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vsi_nn_types.h"
+#include "vsi_nn_tensor.h"
+#include "vsi_nn_graph.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_error.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_tensor_util.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "libnnext/vx_lib_nnext.h"
+
+__BEGIN_DECLS
+
+/*
+ * Define kernel meta.
+ */
+typedef enum
+{
+    LARGE = 0,
+    SMALL
+} _internal_nearest_e;
+
+#define _RESIZE_1D_NEAREST_KERNEL_SOURCE      "resize_1d_nearest"
+
+#define STR(a) #a
+// Add kernel hashtable here
+#define RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, mode ) \
+        (( IN_DTYPE << 20 ) | ( OUT_DTYPE << 8) | (mode))
+
+
+#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, LARGE ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)), \
+          _RESIZE_1D_NEAREST_KERNEL_SOURCE }
+
+#define PACK_KERNEL_MAP_OPT( IN_DTYPE, OUT_DTYPE ) \
+        { RESIZE_1D_NEAREST_HASH_KEY( IN_DTYPE, OUT_DTYPE, SMALL ), \
+          CVIVANTE_NAMESPACE("evis.resize_1d_nearest_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_op"), \
+          _RESIZE_1D_NEAREST_KERNEL_SOURCE }
+
+typedef struct
+{
+    uint32_t key;
+    char * function_name;
+    const char * source_name;
+} _kernel_map_type;
+
+static const _kernel_map_type _resize_1d_nearest_kernel_map[] =
+{
+    // Register kernel here
+    PACK_KERNEL_MAP(F16, F16),
+    PACK_KERNEL_MAP(I16, I16),
+    PACK_KERNEL_MAP(I8, I8),
+    PACK_KERNEL_MAP(U8, U8),
+    PACK_KERNEL_MAP_OPT(F16, F16),
+    PACK_KERNEL_MAP_OPT(I16, I16),
+    PACK_KERNEL_MAP_OPT(I8, I8),
+    PACK_KERNEL_MAP_OPT(U8, U8),
+};
+
+
+/*
+ * Kernel params
+ */
+static vx_param_description_t _resize_1d_nearest_kernel_param_def[] =
+{
+    {VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+    {VX_INPUT,  VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED},
+};
+#define _RESIZE_1D_NEAREST_PARAM_NUM  _cnt_of_array( _resize_1d_nearest_kernel_param_def )
+
+#define SCALAR_ALIGN_CORNERS         (2)
+#define SCALAR_HALF_PIXEL            (3)
+
+/*
+ * Kernel initializer
+ */
+DEF_KERNEL_INITIALIZER(_resize_1d_nearest_initializer)
+    (
+    vsi_nn_kernel_node_t                node,
+    const vsi_nn_kernel_node_param_t  * param,
+    size_t                              param_size
+    )
+{
+#define MAX_POST_SHIFT_BITS     (31)
+#define MAX_MULTIPLIER_NUM      (65535)
+    vsi_status status = VSI_FAILURE;
+    gpu_param_t gpu_param = {
+        3,
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0},
+        {0, 0, 0}
+        };
+    vsi_nn_kernel_tensor_attr_t * output_attr   = NULL;
+    vsi_nn_kernel_tensor_attr_t * input_attr    = NULL;
+    vsi_int_array_t             * out_shape     = NULL;
+    vsi_int_array_t             * in_shape      = NULL;
+    vsi_nn_kernel_dtype_e         input_dtype   = F16;
+    vsi_nn_kernel_dtype_e         output_dtype  = F16;
+    int32_t     align_corners      = 0;
+    int32_t     half_pixel_centers = 0;
+    uint32_t    depth              = 0;
+    int32_t     srcFixPointPos     = 0;
+    int32_t     dstFixPointPos     = 0;
+    float       input_scale        = 1.0;
+    int32_t     inputZP            = 0;
+    float       output_scale       = 1.0;
+    int32_t     outputZP           = 0;
+    float       scale_factor       = 1.0f;
+    uint32_t    in_width           = 0;
+    uint32_t    out_width          = 0;
+    uint32_t    out_height         = 0;
+    float       half_pixel_value   = 0.0f;
+    float       round_value        = 0.0f;
+
+    input_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[0] );
+    CHECK_PTR_FAIL_GOTO( input_attr, "Create tensor attr buffer fail.", final );
+    output_attr = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)param[1] );
+    CHECK_PTR_FAIL_GOTO( output_attr, "Create tensor attr buffer fail.", final );
+
+    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[2], &align_corners);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+    status = vsi_nn_kernel_scalar_read_int32((vsi_nn_kernel_scalar_t)param[3], &half_pixel_centers);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+    out_shape     = output_attr->shape;
+    in_shape      = input_attr->shape;
+    input_dtype   = input_attr->dtype;
+    output_dtype  = output_attr->dtype;
+
+    in_width          = in_shape->data[0];
+    depth             = in_shape->data[2];
+    out_width         = out_shape->data[0];
+    out_height        = out_shape->data[1];
+
+    if (BF16 == input_dtype && output_dtype == BF16)
+    {
+        input_dtype  = F16;
+        output_dtype = F16;
+    }
+    if (align_corners && out_width > 1)
+    {
+        scale_factor = ((float)(in_width - 1) * 1.0f) / (float)(out_width - 1);
+    }
+    else
+    {
+        scale_factor = ((float)in_width * 1.0f) / (float)out_width;
+    }
+
+
+    if (align_corners)
+    {
+        round_value = 0.5f;
+    }
+    else
+    {
+        round_value = 0.0f;
+    }
+
+    if (half_pixel_centers)
+    {
+        half_pixel_value = 0.5f;
+    }
+    else
+    {
+        half_pixel_value = 0.0f;
+    }
+
+    if (VSI_NN_KERNEL_QUANT_ASYMM == input_attr->quant )
+    {
+        input_scale    = input_attr->asymm.scale;
+        inputZP        = input_attr->asymm.zero_point;
+    }
+    else if (VSI_NN_KERNEL_QUANT_DFP == input_attr->quant)
+    {
+        srcFixPointPos   = input_attr->dfp.fl;
+        if (srcFixPointPos >= 0)
+        {
+            input_scale = 1.0f / (float) ((int64_t)1 << srcFixPointPos);
+        }
+        else if (srcFixPointPos < 0)
+        {
+            input_scale = (float)((int64_t)1 << -srcFixPointPos);
+        }
+        inputZP = 0;
+    }
+    else
+    {
+        input_scale = 1.0f;
+        inputZP     = 0;
+    }
+
+    if (VSI_NN_KERNEL_QUANT_ASYMM == output_attr->quant )
+    {
+        output_scale   = 1.0f / output_attr->asymm.scale;
+        outputZP       = output_attr->asymm.zero_point;
+    }
+    else if (VSI_NN_KERNEL_QUANT_DFP == output_attr->quant)
+    {
+        dstFixPointPos = output_attr->dfp.fl;
+        if (dstFixPointPos >= 0)
+        {
+            output_scale = (float) ((int64_t)1 << dstFixPointPos);
+        }
+        else if (dstFixPointPos < 0)
+        {
+            output_scale = 1.0f / (float) ((int64_t)1 << -dstFixPointPos);
+        }
+        outputZP = 0;
+    }
+    else
+    {
+        output_scale = 1.0;
+        outputZP     = 0;
+    }
+
+    if (F16 == input_dtype && F16 == output_dtype)
+    {
+        gpu_dp_inst_t uniGetExtractData_2x8 = {{
+            0x00009999, // TCfg
+            0x00000000, // ASelt
+            0x06040200, 0x00000000, // ABin
+            0x0000aaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000300, // AccumType, ConstantType, and PostShift
+            0x00100010, 0x00100010, 0x00100010, 0x00100010,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+
+        if (scale_factor < 4.0f)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+
+        gpu_param.global_scale[0] = 4;
+        gpu_param.global_scale[1] = 1;
+        gpu_param.global_scale[2] = 1;
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    else if ( input_dtype == output_dtype && (I8 == input_dtype || I16 == input_dtype))
+    {
+        gpu_dp_inst_t uniGetExtractData_2x8 = {{
+            0x00009999, // TCfg
+            0x00000000, // ASelt
+            0x06040200, 0x00000000, // ABin
+            0x0000aaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000300, // AccumType, ConstantType, and PostShift
+            0x00080008, 0x00080008, 0x00080008, 0x00080008,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniConvertI8toI8_2x8 = {{
+            0x11111111, // TCfg
+            0x00000000, // ASelt
+            0x03020100, 0x07060504, // ABin
+            0x22222222, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000600, // AccumType, ConstantType, and PostShift
+            0x00000001, 0x00000001, 0x00000001, 0x00000001,
+            0x00000001, 0x00000001, 0x00000001, 0x00000001 // Constant
+        }, GPU_DP_TYPE_16};
+
+        if (I16 == input_dtype)
+        {
+            uniGetExtractData_2x8.data[8]  = 0x00100010;
+            uniGetExtractData_2x8.data[9]  = 0x00100010;
+            uniGetExtractData_2x8.data[10] = 0x00100010;
+            uniGetExtractData_2x8.data[11] = 0x00100010;
+            uniGetExtractData_2x8.data[12] = 0x00100010;
+            uniGetExtractData_2x8.data[13] = 0x00100010;
+            uniGetExtractData_2x8.data[14] = 0x00100010;
+            uniGetExtractData_2x8.data[15] = 0x00100010;
+        }
+
+        if (srcFixPointPos > dstFixPointPos)
+        {
+            int32_t  postshift      = vsi_nn_min(srcFixPointPos - dstFixPointPos, MAX_POST_SHIFT_BITS);
+
+            uniConvertI8toI8_2x8.data[7] |= (postshift & 0x1F);
+        }
+        else
+        {
+            uint32_t multiplier = vsi_nn_min((int64_t)1 << (dstFixPointPos - srcFixPointPos), MAX_MULTIPLIER_NUM);
+            uint32_t i          = 0;
+
+            for (i = 0; i < 8; i++)
+            {
+                uniConvertI8toI8_2x8.data[i + 8] = multiplier;
+            }
+        }
+
+        if (scale_factor < 4.0f)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+
+        gpu_param.global_scale[0] = 4;
+        gpu_param.global_scale[1] = 1;
+        gpu_param.global_scale[2] = 1;
+
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        status |= vsi_nn_kernel_gpu_add_param( node, "uniConvertI8toI8_2x8", &uniConvertI8toI8_2x8);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    else if (U8 == input_dtype && U8 == output_dtype)
+    {
+        uint16_t  M0                   = 0;
+        int32_t   postShift            = 0;
+        uint32_t  multAndoutZP[2]      = {0};
+        gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
+            0xdddddddd, // TCfg
+            0x44444444, // ASelt
+            0x13121110, 0x17161514, // ABin
+            0x11111111, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00002400, // AccumType, ConstantType, and PostShift
+            0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+        gpu_dp_inst_t uniGetExtractData_2x8 = {{
+            0x00009999, // TCfg
+            0x00000000, // ASelt
+            0x06040200, 0x00000000, // ABin
+            0x0000aaaa, // BSelt
+            0x00000000, 0x00000000, // BBin
+            0x00000300, // AccumType, ConstantType, and PostShift
+            0x00080008, 0x00080008, 0x00080008, 0x00080008,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
+        }, GPU_DP_TYPE_16};
+
+        gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);
+
+        multAndoutZP[0] = (uint32_t)(M0);
+        multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);
+
+        uniMultiplyAndPostShift_2x8.data[7] |= (postShift & 0x1F);
+
+        if (scale_factor < 4.0f)
+        {
+            status  = vsi_nn_kernel_gpu_add_param( node, "uniGetExtractData_2x8", &uniGetExtractData_2x8);
+            CHECK_STATUS_FAIL_GOTO(status, final );
+        }
+
+        gpu_param.global_scale[0] = 4;
+        gpu_param.global_scale[1] = 1;
+        gpu_param.global_scale[2] = 1;
+
+
+        status  = vsi_nn_kernel_gpu_add_param( node, "scale_x", &scale_factor);
+        status |= vsi_nn_kernel_gpu_add_param( node, "multAndoutZP", multAndoutZP);
+        status |= vsi_nn_kernel_gpu_add_param( node, "uniMultiplyAndPostShift_2x8", &uniMultiplyAndPostShift_2x8);
+        CHECK_STATUS_FAIL_GOTO(status, final );
+    }
+    status  = vsi_nn_kernel_gpu_add_param( node, "half_pixel_value", &half_pixel_value);
+    status |= vsi_nn_kernel_gpu_add_param( node, "round_value", &round_value);
+    CHECK_STATUS_FAIL_GOTO(status, final );
+
+    gpu_param.global_size[0]   = gpu_align_p2((out_width  + gpu_param.global_scale[0] - 1)\
+                                / gpu_param.global_scale[0], 4);
+    gpu_param.global_size[1]   = (out_height + gpu_param.global_scale[1] - 1) / gpu_param.global_scale[1];
+    gpu_param.global_size[2]   = depth;
+
+
+    status = vsi_nn_kernel_gpu_config( node, &gpu_param );
+#undef MAX_MULTIPLIER_NUM
+#undef MAX_POST_SHIFT_BITS
+final:
+    if (input_attr) vsi_nn_kernel_tensor_attr_release( &input_attr );
+    if (output_attr) vsi_nn_kernel_tensor_attr_release( &output_attr );
+    return status;
+} /* _resize_nearest_initializer() */
+
+
+
+/*
+ * Query kernel
+ */
+static vsi_status _query_kernel
+    (
+    vsi_nn_kernel_t * kernel,
+    vsi_nn_tensor_t * const * const inputs,
+    vsi_nn_tensor_t * const * const outputs,
+        int32_t align_corners
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_dtype_e in_dtype = F16;
+    vsi_nn_kernel_dtype_e out_dtype = F16;
+    const _kernel_map_type * kernel_map = _resize_1d_nearest_kernel_map;
+    size_t kernel_map_size              = _cnt_of_array( _resize_1d_nearest_kernel_map );
+    vx_param_description_t * param_def  = _resize_1d_nearest_kernel_param_def;
+    size_t param_def_size               = _cnt_of_array( _resize_1d_nearest_kernel_param_def );
+    vx_kernel_initialize_f  initializer = _resize_1d_nearest_initializer;
+
+    uint32_t key = 0;
+    uint32_t i   = 0;
+    uint32_t inputWidth  = inputs[0]->attr.size[0];
+    uint32_t outputWidth = outputs[0]->attr.size[0];
+    float    scale_factor;
+    _internal_nearest_e resize_mode = LARGE;
+
+    if (align_corners && outputWidth > 1)
+    {
+        scale_factor = (vx_float32)(inputWidth - 1) / (vx_float32)(outputWidth - 1);
+    }
+    else
+    {
+        scale_factor = (vx_float32)inputWidth / (vx_float32)outputWidth;
+    }
+
+    if (scale_factor < 4.0f)
+    {
+        resize_mode = SMALL;
+    }
+    else
+    {
+        resize_mode = LARGE;
+    }
+
+
+    in_dtype  = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
+    out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );
+
+    if (BF16 == in_dtype && BF16 == out_dtype)
+    {
+        in_dtype  = F16;
+        out_dtype = F16;
+    }
+
+    key = RESIZE_1D_NEAREST_HASH_KEY( in_dtype, out_dtype, resize_mode );
+
+    for ( i = 0; i < (uint32_t)kernel_map_size; i ++ )
+    {
+        if( kernel_map[i].key == key )
+        {
+            break;
+        }
+    }
+
+    if ( i < (uint32_t)kernel_map_size )
+    {
+        snprintf( kernel->info.name, VX_MAX_KERNEL_NAME, "%s",  kernel_map[i].function_name );
+        kernel->info.parameters  = param_def;
+        kernel->info.numParams   = (uint32_t)param_def_size;
+        kernel->info.initialize  = initializer;
+        // Register code source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_CODE, 2,
+                "vsi_nn_kernel_header",
+                kernel_map[i].source_name );
+        // Register binary source
+        vsi_nn_kernel_add_source( kernel, VSI_NN_GPU_SOURCE_FMT_EXECUTABLE, 1,
+                kernel_map[i].source_name );
+        status = VSI_SUCCESS;
+    }
+    return status;
+} /* _query_kernel() */
+
+
+static vsi_nn_kernel_node_t _setup
+    (
+    vsi_nn_graph_t              * graph,
+    vsi_nn_tensor_t            ** inputs,
+    size_t                        input_num,
+    vsi_nn_tensor_t            ** outputs,
+    size_t                        output_num,
+    const vsi_nn_kernel_param_t * params,
+    vsi_nn_kernel_t             * kernel
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    vsi_nn_kernel_node_param_t node_params[_RESIZE_1D_NEAREST_PARAM_NUM] = {NULL};
+    vsi_nn_kernel_node_t node = NULL;
+    int32_t align_corners       = vsi_nn_kernel_param_get_int32( params, "align_corners" );
+    int32_t half_pixel_centers  = vsi_nn_kernel_param_get_int32( params, "half_pixel_centers" );
+
+    status = _query_kernel( kernel, inputs, outputs, align_corners );
+    if ( VSI_SUCCESS == status)
+    {
+        node = vsi_nn_kernel_create_node( graph, kernel );
+        if ( node )
+        {
+            /* Set inputs and outputs */
+            vsi_nn_kernel_node_pack_io( node_params, _RESIZE_1D_NEAREST_PARAM_NUM,
+                    inputs, input_num, outputs, output_num );
+            node_params[SCALAR_ALIGN_CORNERS] = vsi_nn_kernel_scalar_create( graph, I32, &align_corners );
+            node_params[SCALAR_HALF_PIXEL]    = vsi_nn_kernel_scalar_create( graph, I32, &half_pixel_centers );
+            /* Pass parameters to node. */
+            status  = vsi_nn_kernel_node_pass_param( node, node_params, _RESIZE_1D_NEAREST_PARAM_NUM );
+            VSI_ASSERT( status == VSI_SUCCESS );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_ALIGN_CORNERS] );
+            vsi_nn_kernel_scalar_release( &node_params[SCALAR_HALF_PIXEL] );
+        }
+    }
+    return node;
+} /* _setup() */
+
+__END_DECLS
+
+REGISTER_BACKEND_EVIS( resize_1d_nearest, _setup )
+
--- a/src/tim/vx/internal/src/kernel/evis/resize_bilinear_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/resize_bilinear_evis.c
@ -850,7 +850,7 @@ static vsi_status _query_kernel
        }
    }

-    if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size))
+    if ((UP_2X_HALF == scale_flag) && (i >= kernel_map_size) && is_same_type && is_evis2)
    {
        scale_flag = UP_OPT;
        key = RESIZE_BILINEAR_HASH_KEY( in_dtype, out_dtype, scale_flag );
--- a/src/tim/vx/internal/src/kernel/evis/resize_nearest_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/resize_nearest_evis.c
@ -348,7 +348,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
    else if (U8 == input_dtype && U8 == output_dtype)
    {
        uint16_t  M0                   = 0;
-        vx_int8   postShift            = 0;
+        int32_t   postShift            = 0;
        uint32_t  multAndoutZP[2]      = {0};
        gpu_dp_inst_t uniMultiplyAndPostShift_2x8 = {{
            0xdddddddd, // TCfg
@ -371,7 +371,7 @@ DEF_KERNEL_INITIALIZER(_resize_nearest_initializer)
            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
        }, GPU_DP_TYPE_16};

-        vsi_nn_GetFP32MultiAndPostShift(input_scale * output_scale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(input_scale * output_scale, &M0, &postShift);

        multAndoutZP[0] = (uint32_t)(M0);
        multAndoutZP[1] = (uint32_t)((outputZP << postShift) - inputZP * M0);
--- a/src/tim/vx/internal/src/kernel/evis/select_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/select_evis.c
@ -131,9 +131,9 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
    float    outputScale                    = 1.0f;
    int32_t  outputZP                       = 0;
    uint16_t in0_M0                         = 0;
-    int8_t   in0_postShift                  = 0;
+    int32_t  in0_postShift                  = 0;
    uint16_t in1_M0                         = 0;
-    int8_t   in1_postShift                  = 0;
+    int32_t  in1_postShift                  = 0;
    uint32_t pack_key                       = 0;
    input0_attr  = vsi_nn_kernel_tensor_attr_create( (vsi_nn_kernel_tensor_t)input0);
    CHECK_PTR_FAIL_GOTO( input0_attr, "vsi_nn_kernel_tensor_attr_create fail.", final );
@ -196,8 +196,8 @@ DEF_KERNEL_INITIALIZER(_select_initializer)
        outputZP    = output_attr->asymm.zero_point;
    }

-    vsi_nn_GetFP32MultiAndPostShift(input0Scale / outputScale, &in0_M0, &in0_postShift);
-    vsi_nn_GetFP32MultiAndPostShift(input1Scale / outputScale, &in1_M0, &in1_postShift);
+    gpu_quantize_multiplier_16bit(input0Scale / outputScale, &in0_M0, &in0_postShift);
+    gpu_quantize_multiplier_16bit(input1Scale / outputScale, &in1_M0, &in1_postShift);

    pack_key = _PACK_SELECT_KEY( input0_attr->dtype, input1_attr->dtype, output_attr->dtype );

--- a/src/tim/vx/internal/src/kernel/evis/tile_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/tile_evis.c
@ -309,7 +309,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
    {
        float     uint8Scale = scaleIn / scaleOut;
        uint16_t  M0                   = 0;
-        int8_t    postShift            = 0;
+        int32_t   postShift            = 0;
        uint32_t  multAndoutZP[2]      = {0};

        gpu_dp_inst_t uniU8MulAndPostShift_Lo_2x8 = {{
@ -323,7 +323,7 @@ DEF_KERNEL_INITIALIZER(_tile_initializer)
            0x00000000, 0x00000000, 0x00000000, 0x00000000 // Constant
        }, GPU_DP_TYPE_16};

-        vsi_nn_GetFP32MultiAndPostShift(uint8Scale, &M0, &postShift);
+        gpu_quantize_multiplier_16bit(uint8Scale, &M0, &postShift);
        multAndoutZP[0] = (uint32_t)(M0);
        multAndoutZP[1] = (uint32_t)((output_ZP << postShift) - input_ZP * M0);

--- a/src/tim/vx/internal/src/kernel/evis/upsample_evis.c
+++ b/src/tim/vx/internal/src/kernel/evis/upsample_evis.c
@ -154,7 +154,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
    int32_t  input_fl                          = 0;
    int32_t  output_fl                         = 0;
    uint16_t M0                                = 0;
-    int8_t   postShift                         = 0;
+    int32_t  postShift                         = 0;
    float    inputScale                        = 1.0f;
    int32_t  input_ZP                          = 0;
    float    outputScale                       = 1.0f;
@ -212,7 +212,7 @@ DEF_KERNEL_INITIALIZER(_upsample_initializer)
    factorOut = 1.0f / outputScale;


-    vsi_nn_GetFP32MultiAndPostShift(inputScale / outputScale, &M0, &postShift);
+    gpu_quantize_multiplier_16bit(inputScale / outputScale, &M0, &postShift);

    image_2d = (vsi_bool)(input_shape->size < 3 || 1 == input_shape->data[2]);

--- a/src/tim/vx/internal/src/kernel/vsi_nn_gpu.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_gpu.c
@ -84,7 +84,7 @@ void gpu_quantize_multiplier_32bit
    double q;
    int64_t q_fixed;
    const int32_t bit = 32;
-    if( vsi_abs(double_multiplier - 0.0) < 1e-5 )
+    if( vsi_abs(double_multiplier - 0.0) < 1e-8 )
    {
        *quantize_multiplier = 0;
        *shift = bit - 0;
@ -116,6 +116,25 @@ void gpu_quantize_multiplier_32bit
    }
 } /* gpu_quantize_multiplier_32_bit() */

+void _modify_multiplier_postshift
+    (
+    uint16_t * quantize_multiplier,
+    int32_t * shift
+    )
+{
+    uint16_t multiplier = *quantize_multiplier;
+    int32_t  postshift  = *shift;
+
+    while (postshift > GPU_MAX_POST_SHIFT_BITS)
+    {
+        multiplier = (multiplier + 1) >> 1;
+        postshift --;
+    }
+
+    *quantize_multiplier = multiplier;
+    *shift = postshift;
+}
+
 void gpu_quantize_multiplier_16bit
    (
    double double_multiplier,
@ -135,5 +154,8 @@ void gpu_quantize_multiplier_16bit
    {
        *shift -= bit;
    }
+
+    _modify_multiplier_postshift(quantize_multiplier, shift);
+
 } /* gpu_quantize_multiplier_16bit() */

--- a/src/tim/vx/internal/src/kernel/vsi_nn_kernel.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_kernel.c
@ -1145,10 +1145,10 @@ vsi_nn_kernel_tensor_attr_t * vsi_nn_kernel_tensor_attr_create
        status = vxQueryTensor( (vx_tensor)tensor, VX_TENSOR_SCALE,
            &(attr->asymm.scale), sizeof(float));
        CHECK_STATUS( status );
-        // Reset scale to 1
-        if( (attr->asymm.scale - 0.f) < 1e-5 )
+        // Reset scale to 1e-8
+        if( (attr->asymm.scale - 0.f) < 1e-8 )
            {
-            attr->asymm.scale = 1.0f;
+            attr->asymm.scale = (float)1e-8;
            attr->asymm.zero_point = 0;
            }
        }
@ -1225,13 +1225,17 @@ vsi_status vsi_nn_kernel_pirority_set
 static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
 {
    char *envctrl;
-    int32_t enableShader = 1;
+    static int32_t enableShader = -1;

+    if (enableShader == -1)
+    {
+        enableShader = 1;
        envctrl = getenv("VIV_VX_ENABLE_SHADER");
        if (envctrl)
        {
            enableShader = atoi(envctrl);
        }
+    }

 #if VX_HARDWARE_CAPS_PARAMS_EXT_SUPPORT
    if ( graph->ctx->config.subGroupSize == 0 )
@ -1240,7 +1244,7 @@ static vsi_bool _check_shader_support(vsi_nn_graph_t* graph)
    }
 #endif

-    if(enableShader == 1)
+    if (enableShader >= 1)
    {
        return TRUE;
    }
--- a/src/tim/vx/internal/src/kernel/vsi_nn_kernel_eltwise.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_kernel_eltwise.c
@ -459,7 +459,7 @@ vsi_bool vsi_nn_kernel_optimize_broadcast_shape
        k = 0;
        for (j = 0; j < (size_t)input_num; j++)
        {
-            if (size_in[k] > 1)
+            if (size_in[j] > 1)
            {
                k = j;
                break;
--- a/src/tim/vx/internal/src/kernel/vsi_nn_kernel_util.c
+++ b/src/tim/vx/internal/src/kernel/vsi_nn_kernel_util.c
@ -605,8 +605,12 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
 {
    vsi_nn_tensor_t * new_bias   = NULL;
    vsi_nn_tensor_attr_t attr;
-
+    int32_t  *new_bias_data_ptr  = NULL;
+    uint8_t  *weight_data        = NULL;
+    int32_t  *bias_data          = NULL;
+    uint32_t  i, j;
    memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
+    weight_data = vsi_nn_ConvertTensorToData(graph, weight);

    if (bias == NULL)
    {
@ -620,26 +624,47 @@ vsi_nn_tensor_t* vsi_nn_merge_input_zeropoint_to_bias
            attr.dtype.zero_point = 0;
            attr.dtype.vx_type = VSI_NN_TYPE_INT32;
        }
-        else
-        {
-            VSILOGE("need to add ...");
-        }
    }
    else
    {
        memcpy(&attr, &bias->attr, sizeof(vsi_nn_tensor_attr_t));
+        if (attr.dim_num == 1)
+        {
+            attr.size[1]  = 1;
+            attr.dim_num  = 2;
+        }
+        bias_data = (int32_t *)vsi_nn_ConvertTensorToData(graph, bias);
    }

-    new_bias = vsi_nn_CreateTensorWithDefault(graph, &attr, 0.0);
+    new_bias_data_ptr = (int32_t *)malloc(attr.size[0] * sizeof(int32_t));
+    memset((void *)new_bias_data_ptr, 0, sizeof(int32_t) * attr.size[0]);

-    if (input->attr.dtype.zero_point == 0)
+    if (input->attr.dtype.zero_point != 0)
    {
-        return new_bias;
-    }
-    else
+        for (i = 0; i < weight->attr.size[1]; i++)
        {
-        VSILOGE("need to process bias - (input_zp * (w - w_zp)) ...");
+            uint8_t *weight_ptr = weight_data + i * weight->attr.size[0];
+            for (j = 0; j < weight->attr.size[0]; j++)
+            {
+                 new_bias_data_ptr[i] += -((int32_t)weight_ptr[j] - weight->attr.dtype.zero_point) \
+                                         * input->attr.dtype.zero_point;
            }
+        }
+    }
+
+    if (bias_data != NULL)
+    {
+        for (i = 0; i < weight->attr.size[1]; i++)
+        {
+            new_bias_data_ptr[i] += bias_data[i];
+        }
+    }
+
+    new_bias = vsi_nn_CreateTensorFromData(graph, (uint8_t *)new_bias_data_ptr, &attr);
+
+    vsi_nn_safe_free( new_bias_data_ptr );
+    vsi_nn_safe_free( bias_data );
+    vsi_nn_safe_free( weight_data );

    return new_bias;
 }
--- a/src/tim/vx/internal/src/kernel/vx/convolutional.c
+++ b/src/tim/vx/internal/src/kernel/vx/convolutional.c
@ -31,6 +31,8 @@
 #include "kernel/vsi_nn_kernel.h"
 #include "kernel/vsi_nn_kernel_node.h"
 #include "vsi_nn_feature.h"
+#include "vsi_nn_tensor_util.h"
+#include "vsi_nn_graph_optimization.h"

 static vsi_bool _build_vx_conv2d_param
    (
@ -173,6 +175,7 @@ static vx_tensor _expand_tensor_dim
        vsi_nn_kernel_t             * kernel \
        )

+
 REGISTER_CONV_OPENVX_KERNEL( conv1d )
 {
    vx_node node = NULL;
@ -196,10 +199,34 @@ REGISTER_CONV_OPENVX_KERNEL( conv1d )
    temp_tensors[0] = _expand_tensor_dim( inputs[0]->t,
            (int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
    CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );
-
+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
        temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
                (int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
        CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
+    }
+    else
+    {
+        uint8_t    * data = NULL;
+        vsi_nn_tensor_attr_t attr;
+        uint32_t i;
+
+        data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
+        CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
+
+        memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
+
+        attr.size[0] = 1;
+        for (i = 1; i <= inputs[1]->attr.dim_num; i++)
+        {
+            attr.size[i] = inputs[1]->attr.size[i - 1];
+        }
+        attr.dim_num = inputs[1]->attr.dim_num + 1;
+        attr.dtype.channel_dim = inputs[1]->attr.dtype.channel_dim + 1;
+
+        temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
+        vsi_nn_safe_free( data );
+    }

    temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
            (int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );
@ -248,9 +275,38 @@ REGISTER_CONV_OPENVX_KERNEL( depthwise_conv1d )
            (int32_t*)inputs[0]->attr.size, inputs[0]->attr.dim_num, 0 );
    CHECK_PTR_FAIL_GOTO( temp_tensors[0], "Expand input dim fail.", final );

+    if (inputs[1]->attr.dtype.qnt_type != VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC)
+    {
        temp_tensors[1] = _expand_tensor_dim( inputs[1]->t,
                (int32_t*)inputs[1]->attr.size, inputs[1]->attr.dim_num, 0 );
        CHECK_PTR_FAIL_GOTO( temp_tensors[1], "Expand kernel dim fail.", final );
+    }
+    else
+    {
+        uint8_t    * data = NULL;
+        vsi_nn_tensor_attr_t attr;
+        uint32_t i;
+
+        data = vsi_nn_ConvertTensorToData( graph, inputs[1] );
+        CHECK_PTR_FAIL_GOTO( data, "Convert data fail.", final );
+
+        memcpy(&attr, &inputs[1]->attr, sizeof(vsi_nn_tensor_attr_t));
+
+        attr.size[0] = 1;
+        attr.size[1] = inputs[1]->attr.size[0];
+        attr.size[2] = 1;
+        for (i = 1; i < inputs[1]->attr.dim_num; i++)
+        {
+            attr.size[2] *= inputs[1]->attr.size[i];
+        }
+        attr.size[3] = 1;
+        attr.dim_num = 4;
+        attr.dtype.channel_dim = 2;
+
+        temp_tensors[1] = vsi_nn_CreateRawTensorFromData(graph, data, &attr);
+
+        vsi_nn_safe_free( data );
+    }

    temp_tensors[2] = _expand_tensor_dim( outputs[0]->t,
            (int32_t*)outputs[0]->attr.size, outputs[0]->attr.dim_num, 0 );
--- a/src/tim/vx/internal/src/kernel/vx/eltwise_unary_vx.c
+++ b/src/tim/vx/internal/src/kernel/vx/eltwise_unary_vx.c
@ -38,27 +38,27 @@ typedef struct _sort_lut_s
    float val;
 } sort_lut;

-static float exp_eval(float val)
+static float exp_eval(float val, float alpha)
 {
    return expf(val);
 }

-static float log_eval(float data)
+static float log_eval(float data, float alpha)
 {
    return logf(data);
 }

-static float elu_eval(float data)
+static float elu_eval(float data, float alpha)
 {
-    return data >=0 ? data : expf(data) - 1;
+    return data >=0 ? data : expf(data) * alpha - alpha;
 }

-static float neg_eval(float data)
+static float neg_eval(float data, float alpha)
 {
    return data * -1.0f;
 }

-static float hsigmoid_eval(float data)
+static float hsigmoid_eval(float data, float alpha)
 {
    data = (float)(0.2 * data + 0.5);
    data = vsi_nn_clamp(data, 0, 1);
@ -66,14 +66,14 @@ static float hsigmoid_eval(float data)
    return data;
 }

-static float soft_plus_eval(float data)
+static float soft_plus_eval(float data, float alpha)
 {
-    return log_eval(exp_eval(data) + 1);
+    return log_eval(exp_eval(data, alpha) + 1, alpha);
 }

-static float mish_eval(float data)
+static float mish_eval(float data, float alpha)
 {
-    data = (float)(data * tanh(soft_plus_eval(data)));
+    data = (float)(data * tanh(soft_plus_eval(data, alpha)));

    return data;
 }
@ -96,7 +96,7 @@ static int32_t _lut_comparator(const void *pa, const void *pb)
    return 0;
 }

-static void _set_unary_table_lookup(float func(float), float *index, float *value)
+static void _set_unary_table_lookup(float func(float, float), float *index, float *value, float alpha)
 {
 #define VSI_NN_MAX_LUT_SIZE     (1024)
 #define FLT16_MAX               (57344)
@ -108,25 +108,25 @@ static void _set_unary_table_lookup(float func(float), float *index, float *valu
    {
        int16_t val = (int16_t)(i << 6);
        lut[i].index = fp16_to_fp32(val);
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
    }

    for (i = 0x0; i < 0x10; i++)
    {
        lut[i].index = 0;
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
    }

    for (i = 0x1F0; i < 0x200; i++)
    {
        lut[i].index = FLT16_MAX;
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
    }

    for (i = 0x3F0; i < 0x400; i++)
    {
        lut[i].index = FLT16_MIN;
-        lut[i].val = func(lut[i].index);
+        lut[i].val = func(lut[i].index, alpha);
    }

    qsort(lut, VSI_NN_MAX_LUT_SIZE, sizeof(sort_lut), _lut_comparator);
@ -154,13 +154,14 @@ static vsi_nn_kernel_node_t _setup
    size_t                        output_num,
    const vsi_nn_kernel_param_t * params,
    vsi_nn_kernel_t             * kernel,
-    float                      func(float)
+    float                      func(float, float)
    )
 {
 #ifdef VX_USER_LOOKUP_TABLE_SUPPORT
    vx_lut lut1 = NULL;
    vx_lut lut2 = NULL;
    vx_node node = NULL;
+    float alpha = vsi_nn_kernel_param_get_float32( params, "alpha" );
    float index[1024] = {0};
    float value[1024] = {0};

@ -172,7 +173,7 @@ static vsi_nn_kernel_node_t _setup
        return NULL;
    }

-    _set_unary_table_lookup(func, index, value);
+    _set_unary_table_lookup(func, index, value, alpha);

    lut1 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);
    lut2 = vxCreateLUT( graph->ctx->c, VX_TYPE_FLOAT32, 1024);
--- a/src/tim/vx/internal/src/libnnext/ops/cl/eltwise_unary.cl
+++ b/src/tim/vx/internal/src/libnnext/ops/cl/eltwise_unary.cl
@ -1,12 +1,12 @@

-float4 eltwise_unary_sin(float4 x)
+float4 eltwise_unary_sin(float4 x, float alpha)
 {
    return native_sin(x);
 }

 #define logE        (1.44269502f)
 #define twoLogE     (logE * 2.0f)
-float4 eltwise_unary_exp(float4 x)
+float4 eltwise_unary_exp(float4 x, float alpha)
 {
    x *= logE;
    x = exp2(x);
@ -14,33 +14,33 @@ float4 eltwise_unary_exp(float4 x)
 }

 #define rlogE    (0.693147182f)
-float4 eltwise_unary_log(float4 x)
+float4 eltwise_unary_log(float4 x, float alpha)
 {
    x = log2(x);
    return x * rlogE;
 }

-float4 eltwise_unary_elu(float4 val)
+float4 eltwise_unary_elu(float4 val, float alpha)
 {
    float4 x = val * logE;
-    x = exp2(x) - 1;
+    x = exp2(x) * alpha - alpha;

    return val < 0 ? x : val;
 }

-float4 eltwise_unary_neg(float4 x)
+float4 eltwise_unary_neg(float4 x, float alpha)
 {
    return x * -1;
 }

-float4 eltwise_unary_hard_sigmoid(float4 x)
+float4 eltwise_unary_hard_sigmoid(float4 x, float alpha)
 {
    x = 0.2 * x + 0.5;
    x = clamp(x, 0, 1);
    return x;
 }

-float4 _softrelu(float4 x)
+float4 _softrelu(float4 x, float alpha)
 {
    x *= logE;
    x = exp2(x);
@ -49,7 +49,7 @@ float4 _softrelu(float4 x)
    return x * rlogE;
 }

-float4 _tanh(float4 x)
+float4 _tanh(float4 x, float alpha)
 {
    x *= -twoLogE;
    x = 1 + exp2(x);
@ -57,10 +57,10 @@ float4 _tanh(float4 x)
    return (2 * x - 1);
 }

-float4 eltwise_unary_mish(float4 x)
+float4 eltwise_unary_mish(float4 x, float alpha)
 {
-    float4 y = _softrelu(x);
-    x = x * _tanh(y);
+    float4 y = _softrelu(x, alpha);
+    x = x * _tanh(y, alpha);
    return x;
 }

@ -72,14 +72,15 @@ __kernel void func_name##_F32toF32 \
                 float           inputScale, \
                 float           inputTail, \
                 float           outputScale, \
-                 float           outputZP \
+                 float           outputZP, \
+                 float           alpha \
    ) \
 { \
    int4 coord =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
 \
    float4 src = read_imagef(input, coord); \
 \
-    float4 dst = eltwise_unary_##func_name(src); \
+    float4 dst = eltwise_unary_##func_name(src, alpha); \
 \
    write_imagef(output, coord, dst); \
 }
@ -99,14 +100,15 @@ __kernel void func_name##_F32toF32_2D \
                 float     inputScale, \
                 float     inputTail, \
                 float     outputScale, \
-                 float     outputZP \
+                 float     outputZP, \
+                 float     alpha \
    ) \
 { \
    int2 coord =  (int2)(get_global_id(0), get_global_id(1)); \
 \
    float4 src = read_imagef(input, coord); \
 \
-    float4 dst = eltwise_unary_##func_name(src); \
+    float4 dst = eltwise_unary_##func_name(src, alpha); \
 \
    write_imagef(output, coord, dst); \
 }
@ -126,7 +128,8 @@ __kernel void func_name##_U8toU8 \
                 float           inputScale, \
                 float           inputTail, \
                 float           outputScale, \
-                 float           outputZP \
+                 float           outputZP, \
+                 float           alpha \
    ) \
 { \
    int4 coord =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
@ -134,7 +137,7 @@ __kernel void func_name##_U8toU8 \
    uint4 src = read_imageui(input, coord); \
    float4 data = convert_float4(src) * inputScale - inputTail; \
 \
-    data = eltwise_unary_##func_name(data); \
+    data = eltwise_unary_##func_name(data, alpha); \
    uint4 dst = convert_uint4(data * outputScale + outputZP); \
 \
    write_imageui(output, coord, dst); \
@ -155,7 +158,8 @@ __kernel void func_name##_U8toU8_2D \
                 float     inputScale, \
                 float     inputTail, \
                 float     outputScale, \
-                 float     outputZP \
+                 float     outputZP, \
+                 float     alpha \
    ) \
 { \
    int2 coord =  (int2)(get_global_id(0), get_global_id(1)); \
@ -163,7 +167,7 @@ __kernel void func_name##_U8toU8_2D \
    uint4 src = read_imageui(input, coord); \
    float4 data = convert_float4(src) * inputScale - inputTail; \
 \
-    data = eltwise_unary_##func_name(data); \
+    data = eltwise_unary_##func_name(data, alpha); \
    uint4 dst = convert_uint4(data * outputScale + outputZP); \
 \
    write_imageui(output, coord, dst); \
@ -184,7 +188,8 @@ __kernel void neg_I32toI32
                 float           inputScale,
                 float           inputTail,
                 float           outputScale,
-                 float           outputZP
+                 float           outputZP,
+                 float           alpha
    )
 {
    int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
@ -202,7 +207,8 @@ __kernel void neg_I32toI32_2D
                 float     inputScale,
                 float     inputTail,
                 float     outputScale,
-                 float     outputZP
+                 float     outputZP,
+                 float     alpha
    )
 {
    int2 coord =  (int2)(get_global_id(0), get_global_id(1));
--- a/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_bilinear.cl
+++ b/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_bilinear.cl
@ -0,0 +1,57 @@
+__kernel void resize_1d_bilinear_F32toF32(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value
+                           )
+{
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    float  in_x         = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
+    float  left_x_f     = floor(in_x);
+    float  x_lerp       = in_x - left_x_f;
+    int    left_x_idx   = convert_int(left_x_f);
+    int4   coord_in     = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
+    float4 top_l, top_r, top, bottom, dst;
+
+    top_l    = read_imagef(input, coord_in);
+    coord_in.x++;
+    top_r    = read_imagef(input, coord_in);
+
+    top_r    = top_r - top_l;
+    dst      = top_l + x_lerp * top_r;
+
+    write_imagef(output, coord_out, dst);
+
+}
+
+
+__kernel void resize_1d_bilinear_U8toU8(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value,
+                           float  in_scale,
+                           float  in_tail,
+                           float  out_scale,
+                           float  out_tail
+                           )
+{
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    float  in_x         = (convert_float(coord_out.x) + half_pixel_value) * scale_x - half_pixel_value;
+    float  left_x_f     = floor(in_x);
+    float  x_lerp       = in_x - left_x_f;
+    int    left_x_idx   = convert_int(left_x_f);
+    int4   coord_in     = (int4)(left_x_idx, coord_out.y, coord_out.z, 0);
+    float4 top_l, top_r, top;
+    uint4  dst;
+
+    top_l    = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
+    coord_in.x++;
+    top_r    = convert_float4(read_imageui(input, coord_in)) * in_scale + in_tail;
+
+    top_r    = top_r - top_l;
+    top      = top_l + x_lerp * top_r;
+    dst      = convert_uint4(top * out_scale + out_tail);
+
+    write_imageui(output, coord_out, dst);
+}
--- a/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_nearest.cl
+++ b/src/tim/vx/internal/src/libnnext/ops/cl/resize_1d_nearest.cl
@ -0,0 +1,36 @@
+
+#define NEAREST_INDEX_PROCESS() \
+    int4   coord_out  = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
+    float  in_x       = (convert_float(coord_out.x) + half_pixel_value) * scale_x + round_value; \
+    int    in_x_idx   = convert_int(in_x); \
+
+__kernel void resize_1d_nearest_F32toF32(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value,
+                           float  round_value)
+{
+    NEAREST_INDEX_PROCESS()
+    int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
+    float4 dst;
+    dst    = read_imagef(input, coord_in);
+    write_imagef(output, coord_out, dst);
+}
+
+
+__kernel void resize_1d_nearest_U8toU8(
+    __read_only  image2d_array_t  input,
+    __write_only image2d_array_t  output,
+                           float  scale_x,
+                           float  half_pixel_value,
+                           float  round_value,
+                           float  output_scale,
+                           float  output_tail)
+{
+    NEAREST_INDEX_PROCESS()
+    int4 coord_in = (int4)(in_x_idx, coord_out.y, coord_out.z, 0);
+    uint4 dst;
+    dst    = convert_uint4(convert_float4(read_imageui(input, coord_in)) * output_scale + output_tail);
+    write_imageui(output, coord_out, dst);
+}
--- a/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_2d.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_2d.vx
@ -1,5 +1,7 @@
 #include "cl_viv_vx_ext.h"

+_viv_uniform float alpha;
+
 float4 eltwise_unary_sin(float4 x)
 {
    return native_sin(x);
@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
 float4 eltwise_unary_elu(float4 val)
 {
    float4 x = val * logE;
-    x = exp2(x) - 1;
+    x = exp2(x) * alpha - alpha;

    return val < 0 ? x : val;
 }
@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
    __kernel void func_name##_##src_type_name##to##dst_type_name##_2D( \
    __read_only  image2d_array_t  input, \
    __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
    ) \
 { \
    int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
@ -194,7 +197,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
    __kernel void func_name##_BF16toBF16_2D( \
    __read_only  image2d_array_t  input, \
    __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
    ) \
 { \
    int2 coord = (int2)(get_global_id(0), get_global_id(1)); \
--- a/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_3d.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/eltwise_unary_3d.vx
@ -1,5 +1,7 @@
 #include "cl_viv_vx_ext.h"

+_viv_uniform float alpha;
+
 float4 eltwise_unary_sin(float4 x)
 {
    return native_sin(x);
@ -24,7 +26,7 @@ float4 eltwise_unary_log(float4 x)
 float4 eltwise_unary_elu(float4 val)
 {
    float4 x = val * logE;
-    x = exp2(x) - 1;
+    x = exp2(x) * alpha - alpha;

    return val < 0 ? x : val;
 }
@ -78,7 +80,8 @@ _viv_uniform VXC_512Bits uniDatatoFp32Part1_4x4;
 __kernel void func_name##_##src_type_name##to##dst_type_name( \
    __read_only  image2d_array_t  input, \
    __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
    ) \
 { \
    int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
@ -192,7 +195,8 @@ _viv_uniform VXC_512Bits uniExtractOddData_2x8;
    __kernel void func_name##_BF16toBF16( \
    __read_only  image2d_array_t  input, \
    __write_only image2d_array_t  output, \
-                 int              type \
+                 int              type, \
+                 float            _alpha \
    ) \
 { \
    int4 coord = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_BF16.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_BF16.vx
@ -0,0 +1,148 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_odd_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_even_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_Part0_2x8;
+_viv_uniform VXC_512Bits uniConvBF16toF32_Part1_2x8;
+_viv_uniform float half_pixel_value;
+
+__kernel void resize_1d_bilinear_BF16toBF16_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4   coord_out    =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x      = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x         = (convert_float4(coord_x) + half_pixel_value)  * scale_x - half_pixel_value;
+    float4 left_x_f     = floor(in_x);
+    float4 x_lerp       = in_x - left_x_f;
+    int4   left_x_idx   = convert_int4(left_x_f);
+
+    vxc_short8 top;
+    vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, top, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        vxc_ushort8 src;
+        float4 left4;
+        float4 right4;
+        float4 dst4;
+
+        VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_even_2x8);
+        _viv_asm(COPY, right4, src, 16);
+        VXC_DP2x8(src, top, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_odd_2x8);
+        _viv_asm(COPY, left4,  src, 16);
+        right4    -= left4;
+        dst4       = right4 * x_lerp + left4;
+        vxc_ushort8 tmp, dst;
+        _viv_asm(COPY, tmp, dst4, 16);
+        dst.s0123 = tmp.s1357;
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_BF16toBF16_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_ushort8 src0, src1, dst0;
+    vxc_short8 zero = (vxc_short8)(0, 0, 0, 0, 0, 0, 0, 0);
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 16;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        coord_in.y ++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        vxc_ushort8 dst_tmp;
+        float4 left4;
+        float4 right4;
+
+        VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part0_2x8);
+        _viv_asm(COPY, left4, dst_tmp, 16);
+        VXC_DP2x8(dst_tmp, dst0, zero, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvBF16toF32_Part1_2x8);
+        _viv_asm(COPY, right4, dst_tmp, 16);
+        right4     -= left4;
+        float4 dst4 = right4 * x_lerp + left4;
+
+        vxc_ushort8 tmp, dst;
+        _viv_asm(COPY, tmp, dst4, 16);
+        dst.s0123 = tmp.s1357;
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_out.y++;
+    } while (coord_out.y < out_height);
+}
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_DOWN_NX.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_DOWN_NX.vx
@ -0,0 +1,136 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniResizeNxDown_2x8;
+_viv_uniform int out_height;
+
+#define RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
+    read_type read_data, save_data; \
+    data_type in0, result; \
+ \
+    int8 input_desc; \
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
+    _viv_asm(MOV, coord_in.w, baseAddr); \
+ \
+    int8 output_desc; \
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
+    _viv_asm(MOV, coord_out.w, baseAddr); \
+ \
+    while (coord_out.y < out_height) \
+    { \
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
+                VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in0, read_data, 16); \
+        VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
+        _viv_asm(COPY, save_data, result, 16); \
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
+        coord_in.y++; \
+        coord_out.y++; \
+    } \
+
+#define RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_DOWN_8BIT_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+
+
+
+#define RESIZE_1D_2X_DOWN_8BIT_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_8BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_8BIT_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_DOWN_8BIT_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+
+
+#define RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
+    read_type read_data, read_data1, save_data; \
+    data_type in0, in1, result; \
+ \
+    int8 input_desc; \
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
+    _viv_asm(MOV, coord_in.w, baseAddr); \
+ \
+    int8 output_desc; \
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
+    _viv_asm(MOV, coord_out.w, baseAddr); \
+ \
+    while (coord_out.y < out_height) \
+    { \
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in0, read_data, 16); \
+        VXC_OP4(img_load_3d, read_data1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0), \
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in1, read_data1, 16); \
+        VXC_DP2x8(result, in0, in1, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxDown_2x8); \
+        _viv_asm(COPY, save_data, result, 16); \
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
+        coord_in.y++; \
+        coord_out.y++; \
+    } \
+
+#define RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_DOWN_16BIT_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+
+#define RESIZE_1D_2X_DOWN_16BIT_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_DOWN_2X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x << 1; \
+    RESIZE_1D_NX_DOWN_16BIT_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_DOWN_16BIT_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_DOWN_16BIT_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_F16.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_F16.vx
@ -0,0 +1,216 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform VXC_512Bits uniFp16toFp32_4x4;
+_viv_uniform VXC_512Bits uniRightSubLeft_4x4;
+_viv_uniform VXC_512Bits uniExtactHalf8_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform float uint8Scale;
+_viv_uniform float output_ZP;
+_viv_uniform VXC_512Bits uniFp16toFp32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform float half_pixel_value;
+_viv_uniform VXC_512Bits uniConvertFp2FP32_left_4x4;
+_viv_uniform VXC_512Bits uniConvertFp2FP32_right_4x4;
+
+__kernel void resize_1d_bilinear_F16toF16_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 left4;
+    float4 right4;
+
+    vxc_ushort8 src, result;
+    vxc_half8 src_half, dst;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        _viv_asm(COPY, src_half, src, 16);
+
+        VXC_DP4x4(left4,  src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+
+        half4 tmp;
+        _viv_asm(CONV, tmp, dst4);
+        VXC_DP2x8(dst, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
+        _viv_asm(COPY, result, dst, 16);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+
+}
+
+__kernel void resize_1d_bilinear_F16toU8_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 left4;
+    float4 right4;
+
+    vxc_ushort8 src;
+    vxc_uchar8 result;
+    vxc_half8 src_half, dst;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        _viv_asm(COPY, src_half, src, 16);
+
+        VXC_DP4x4(left4,  src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertFp2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+
+        dst4         = dst4 * uint8Scale + output_ZP;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_F16toF16_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_ushort8 src0, src1, dst0;
+    vxc_half8 top;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 16;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        _viv_asm(COPY, top, dst0, 16);
+
+        coord_in.y ++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        float4 left4;
+        float4 right4;
+
+        VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_4x4);
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniFp16toFp32_part1_4x4);
+        float4 dst4  = right4 * x_lerp + left4;
+
+        half4 tmp;
+        _viv_asm(CONV, tmp, dst4);
+        VXC_DP2x8(top, tmp, tmp, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniExtactHalf8_2x8);
+        _viv_asm(COPY, dst0, top, 16);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst0,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_out.y++;
+    } while (coord_out.y < out_height);
+}
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I16.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I16.vx
@ -0,0 +1,147 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
+_viv_uniform float dfpScale;
+_viv_uniform float half_pixel_value;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
+
+__kernel void resize_1d_bilinear_I16toI16_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_ushort8 src0, src1, dst0;
+
+    vxc_short8 top;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+            VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 16;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src1, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        _viv_asm(COPY, top, dst0, 16);
+
+        float4 left4;
+        float4 right4;
+
+        coord_in.y ++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * dfpScale;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_I16toI16_DOWN
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_short8 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+    vxc_short8 result;
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * dfpScale;
+
+        int4 dst     = convert_int4_rte(dst4);
+
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I8.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_I8.vx
@ -0,0 +1,148 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_4x4;
+_viv_uniform float dfpScale;
+_viv_uniform float half_pixel_value;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_left_4x4;
+_viv_uniform VXC_512Bits uniConvertDFP2FP32_right_4x4;
+
+__kernel void resize_1d_bilinear_I8toI8_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+    vxc_uchar16 src0, dst0;
+
+    vxc_char16 top;
+
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 8;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(dst0, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        _viv_asm(COPY, top, dst0, 16);
+
+        coord_in.y++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+                VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+        float4 left4;
+        float4 right4;
+
+        VXC_DP4x4(left4, top, top, \
+        VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_4x4);
+        VXC_DP4x4(right4, top, top, \
+        VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_part1_4x4);
+
+        float4 dst4  = right4 * x_lerp + left4;
+
+        dst4         = dst4 * dfpScale;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_I8toI8_DOWN
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_char16 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+    vxc_char16 result;
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_left_4x4);
+        VXC_DP4x4(right4, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniConvertDFP2FP32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * dfpScale;
+
+        int4 dst     = convert_int4_rte(dst4);
+
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+
+}
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8.vx
@ -0,0 +1,212 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_4x4;
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_left_4x4;
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_right_4x4;
+_viv_uniform VXC_512Bits uniExtact8Bit_2x8;
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform int input_ZP;
+_viv_uniform float uint8Scale;
+_viv_uniform float output_ZP;
+_viv_uniform VXC_512Bits uniU8SubZPtoFp32_part1_4x4;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform float half_pixel_value;
+
+__kernel void resize_1d_bilinear_U8toF16_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_uchar16 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    unsigned char inputZP;
+    _viv_asm(COPY, inputZP, input_ZP, 4);
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
+        VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4 *=  uint8Scale;
+        half4 dst;
+        _viv_asm(CONV, dst, dst4);
+        vxc_short8 dst_short;
+        _viv_asm(COPY, dst_short, dst, 16);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst_short,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_U8toU8_UP
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    float4 right_x_f   = ceil(in_x);
+    int4   right_x_idx = convert_int4(right_x_f);
+
+
+    vxc_uchar16 src0, src1;
+
+    vxc_uchar16 top;
+
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 8;
+    VXC_DP2x8(maskShift, bitextract_p0, constData, \
+    VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(top, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
+                VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+        float4 left4;
+        float4 right4;
+
+        unsigned char inputZP;
+        _viv_asm(COPY, inputZP, input_ZP, 4);
+        VXC_DP4x4(left4, top, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_4x4);
+        VXC_DP4x4(right4, top, top, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_part1_4x4);
+
+        float4 dst4 = right4 * x_lerp + left4;
+        dst4         = dst4 * uint8Scale + output_ZP;
+        int4 dst     = convert_int4_rte(dst4);
+        VXC_DP2x8(top, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, top,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
+
+__kernel void resize_1d_bilinear_U8toU8_DOWN
+    (
+    __read_only     image2d_array_t input,
+    __write_only    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0);
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+    float4 left_x_f    = floor(in_x);
+    float4 x_lerp      = in_x - left_x_f;
+    int4   left_x_idx  = convert_int4(left_x_f);
+    vxc_uchar16 src;
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+    float4 left4;
+    float4 right4;
+    vxc_uchar16 result;
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    unsigned char inputZP;
+    _viv_asm(COPY, inputZP, input_ZP, 4);
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+
+    do
+    {
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 1, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.y;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(2, 3, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.z;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(4, 5, 0, VXC_RM_TowardZero, 0));
+        coord_in.x = left_x_idx.w;
+        VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(6, 7, 0, VXC_RM_TowardZero, 0));
+
+        VXC_DP4x4(left4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_left_4x4);
+        VXC_DP4x4(right4, src, inputZP, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniU8SubZPtoFp32_right_4x4);
+        right4      -= left4;
+        float4 dst4  = right4 * x_lerp + left4;
+        dst4         = dst4 * uint8Scale + output_ZP;
+        int4 dst     = convert_int4_rte(dst4);
+
+        VXC_DP2x8(result, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniExtact8Bit_2x8);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, result,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+        coord_in.y++;
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+
+}
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8_opt.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_U8_opt.vx
@ -0,0 +1,78 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform float scale_x;
+_viv_uniform int out_height;
+_viv_uniform VXC_512Bits uniConvertI32toI16_2x8;
+_viv_uniform VXC_512Bits uniGetMaskShift_2x8;
+_viv_uniform VXC_512Bits uniBilinear_4x4;
+_viv_uniform float half_pixel_value;
+
+__kernel void resize_1d_bilinear_U8toU8_UP_opt
+    (
+    __read_only  image2d_array_t   input,
+    __write_only image2d_array_t   output,
+                             int   align_corners,
+                             int   half_pixel_centers,
+    __read_only  image2d_array_t   scale
+    )
+{
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(2), 0);
+
+    int4   coord_x     = coord_out.xxxx + (int4)(0, 1, 2, 3);
+    float4 in_x        = (convert_float4(coord_x) + half_pixel_value) * scale_x - half_pixel_value;
+
+    float4 left_x_f    = floor(in_x);
+    int4   left_x_idx  = convert_int4(left_x_f);
+    int4   right_x_idx = left_x_idx + 1;
+
+    vxc_uchar16 src0;
+
+    vxc_uchar16 src_mask;
+
+    int4 coord_in = (int4)(left_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+            VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 bitextract_p0;
+    vxc_uchar16 maskShift = {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
+    VXC_DP2x8(bitextract_p0, left_x_idx, right_x_idx,
+              VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniConvertI32toI16_2x8);
+    vxc_ushort8 constData = 8;
+    VXC_DP2x8(maskShift, bitextract_p0, constData,
+              VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0), uniGetMaskShift_2x8);
+
+    vxc_ushort8 lerp_0;
+    vxc_half8 lerp;
+
+    int2 coord = (int2)(coord_out.x * 2, 0);
+    VXC_ReadImage(lerp_0, scale, coord, VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    _viv_asm(COPY, lerp, lerp_0, 16);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    do
+    {
+        VXC_BitExtract(src_mask, src0, src0, maskShift, VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+        coord_in.y++;
+        VXC_OP4(img_load_3d, src0, input, coord_in.xyww,
+                VXC_5BITOFFSET_XY(0, 0), VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+        vxc_uchar16 dst;
+        VXC_DP4x4(dst, src_mask, lerp,
+                VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniBilinear_4x4);
+
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+                VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+
+        coord_out.y ++;
+    } while (coord_out.y < out_height);
+}
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_UP_NX.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_bilinear_UP_NX.vx
@ -0,0 +1,155 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniResizeNxUp_2x8;
+_viv_uniform int out_height;
+
+#define RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+    read_type read_data, save_data; \
+    data_type in0, result; \
+ \
+    int8 input_desc; \
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc)); \
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0; \
+    _viv_asm(MOV, coord_in.w, baseAddr); \
+ \
+    int8 output_desc; \
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc)); \
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0; \
+    _viv_asm(MOV, coord_out.w, baseAddr); \
+ \
+    while (coord_out.y < out_height) \
+    { \
+        VXC_OP4(img_load_3d, read_data, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0), \
+                VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0)); \
+        _viv_asm(COPY, in0, read_data, 16); \
+        VXC_DP2x8(result, in0, in0, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniResizeNxUp_2x8); \
+        _viv_asm(COPY, save_data, result, 16); \
+        VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, save_data, \
+            VXC_MODIFIER(0, 7, 0,VXC_RM_TowardZero, 0)); \
+        coord_in.y++; \
+        coord_out.y++; \
+    } \
+
+#define RESIZE_1D_2X_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x = (coord_out.x * 2 - 1) >> 2; \
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_2X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_2X_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_2X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x >> 1; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_2X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_2X_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_2X_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_2X_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_4X_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x = (coord_out.x * 2 - 3) >> 3; \
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_4X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_4X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_4X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_4X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_4X_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_4X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x >> 2; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_4X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_4X_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_4X_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_4X_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_8X_HALF_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_HALF_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x = (coord_out.x * 2 - 7) >> 4; \
+    coord_in.x  = coord_out.x == 0 ? -1 : coord_in.x; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_8X_HALF_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_8X_HALF_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_8X_HALF_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_8X_HALF_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
+#define RESIZE_1D_8X_SAME(name0, name1, read_type, data_type) \
+__kernel void resize_1d_bilinear_##name0##to##name1##_UP_8X_SAME \
+    ( \
+    __read_only  image2d_array_t   input, \
+    __write_only image2d_array_t   output, \
+                             int   scale_type \
+    ) \
+{ \
+    int4 coord_out  =  (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    int4 coord_in   = (int4)(get_global_id(0), 0, get_global_id(1), 0); \
+    coord_in.x =  coord_out.x >> 3; \
+    RESIZE_1D_NX_SAME_PROCESS(read_type, data_type) \
+}
+
+RESIZE_1D_8X_SAME(U8,  U8,  vxc_uchar16, vxc_uchar16)
+RESIZE_1D_8X_SAME(I8,  I8,  vxc_char16,  vxc_char16)
+RESIZE_1D_8X_SAME(I16, I16, vxc_short8,  vxc_short8)
+RESIZE_1D_8X_SAME(F16, F16, vxc_short8,  vxc_half8)
+
+
--- a/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_nearest.vx
+++ b/src/tim/vx/internal/src/libnnext/ops/vx/resize_1d_nearest.vx
@ -0,0 +1,337 @@
+#include "cl_viv_vx_ext.h"
+
+_viv_uniform VXC_512Bits uniMultiplyAndPostShift_2x8;
+_viv_uniform float scale_x;
+_viv_uniform float half_pixel_value;
+_viv_uniform float round_value;
+_viv_uniform int2 multAndoutZP;//[0:15] multiplier, [31:63] output zp
+
+#define NEAREST_INDEX_PROCESS() \
+    int4   coord_out  = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); \
+    int4   coord_x    = coord_out.xxxx + (int4)(0, 1, 2, 3); \
+    float4 in_x       = (convert_float4(coord_x) + half_pixel_value) * scale_x + round_value; \
+    int4   in_x_idx   = convert_int4(in_x); \
+
+
+__kernel void resize_1d_nearest_F16toF16
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_short8 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+_viv_uniform VXC_512Bits uniGetExtractData_2x8;
+__kernel void resize_1d_nearest_F16toF16_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_ushort8 src0, src1, dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, \
+    VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+_viv_uniform VXC_512Bits uniConvertI8toI8_2x8;
+__kernel void resize_1d_nearest_I8toI8
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_char16 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+    VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 7, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_I8toI8_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_uchar16 src0, dst0;
+    vxc_char16 dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, \
+    VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst0, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+    _viv_asm(COPY, dst, dst0, 8);
+    VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_U8toU8
+    (
+    image2d_array_t input,
+    image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_uchar16 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+
+    vxc_ushort8 multiplier;
+    _viv_asm(COPY, multiplier, multAndoutZP, 16);
+    VXC_DP2x8(src, src, multiplier, \
+    VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_U8toU8_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_uchar16 src0, dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 15, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst, src0, src0, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+    vxc_ushort8 multiplier;
+    _viv_asm(COPY, multiplier, multAndoutZP, 16);
+    VXC_DP2x8(dst, dst, multiplier, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniMultiplyAndPostShift_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_I16toI16
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_short8 src;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 0, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.y;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(1, 1, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.z;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(2, 2, 0, VXC_RM_TowardZero, 0));
+    coord_in.x = in_x_idx.w;
+    VXC_OP4(img_load_3d, src, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(3, 3, 0, VXC_RM_TowardZero, 0));
+
+    VXC_DP2x8(src, src, src, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, src,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
+
+__kernel void resize_1d_nearest_I16toI16_op
+    (
+    __read_only  image2d_array_t input,
+    __write_only image2d_array_t output,
+    int   align_corners,
+    int   half_pixel_centers
+    )
+{
+    NEAREST_INDEX_PROCESS()
+
+    vxc_ushort8 src0, src1, dst0;
+    vxc_short8 dst;
+    int4 coord_in = (int4)(in_x_idx.x, coord_out.y, coord_out.z, 0);
+
+    int8 input_desc;
+    _viv_asm(COPY, input_desc, input, sizeof(input_desc));
+    int baseAddr = (int)coord_in.z * input_desc.s4 + input_desc.s0;
+    _viv_asm(MOV, coord_in.w, baseAddr);
+
+    int8 output_desc;
+    _viv_asm(COPY, output_desc, output, sizeof(output_desc));
+    baseAddr = (int)coord_out.z * output_desc.s4 + output_desc.s0;
+    _viv_asm(MOV, coord_out.w, baseAddr);
+
+    VXC_OP4(img_load_3d, src0, input, coord_in.xyww, VXC_5BITOFFSET_XY(0, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+    VXC_OP4(img_load_3d, src1, input, coord_in.xyww, VXC_5BITOFFSET_XY(8, 0),
+        VXC_MODIFIER(0, 7, 0, VXC_RM_TowardZero, 0));
+
+    vxc_uchar16 mask = (vxc_uchar16)(8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16);
+    vxc_ushort8 input_idx;
+    _viv_asm(COPY, input_idx, in_x_idx, 16);
+    VXC_DP2x8(mask, input_idx, input_idx, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0), uniGetExtractData_2x8);
+    VXC_BitExtract(dst0, src0, src1, mask, VXC_MODIFIER(0, 3, 0, VXC_RM_TowardZero, 0));
+    _viv_asm(COPY, dst, dst0, 8);
+    VXC_DP2x8(dst, dst, dst, VXC_MODIFIER(0, 3, 0, VXC_RM_ToNearestEven, 1), uniConvertI8toI8_2x8);
+
+    VXC_OP4_NoDest(img_store_3d, output, coord_out.xyww, dst,
+            VXC_MODIFIER(0, 3, 0,VXC_RM_TowardZero, 0));
+}
--- a/src/tim/vx/internal/src/libnnext/vsi_nn_libnnext_resource.c
+++ b/src/tim/vx/internal/src/libnnext/vsi_nn_libnnext_resource.c
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_batch_norm.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_batch_norm.c
@ -176,8 +176,8 @@ static vsi_status op_optimize
        reshape 3d input (xcn) --> 4d input (whcn)
        reshape 3d output(xcn) --> 4d output(whcn)
    */
-    shape[0] = inputs[0]->attr.size[0];
-    shape[1] = 1;
+    shape[0] = 1;
+    shape[1] = inputs[0]->attr.size[0];
    shape[2] = inputs[0]->attr.size[1];
    shape[3] = inputs[0]->attr.size[2];
    dim = 4;
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_cast.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_cast.c
@ -136,6 +136,7 @@ static vsi_bool op_check
        IO_TYPE(D_I32,  D_F32)
        IO_TYPE(D_I32,  D_I32)
        IO_TYPE(D_I32,  D_U32)
+        IO_TYPE(D_I32,  D_F16)
        IO_TYPE(D_I32,  D_BOOL8)
        IO_TYPE(D_U32,  D_F32)
        IO_TYPE(D_U32,  D_I32)
@ -176,6 +177,7 @@ static vsi_bool op_check
        IO_TYPE(D_U8|Q_ASYM,  D_F32)
        IO_TYPE(D_U8|Q_ASYM,  D_I32)
        IO_TYPE(D_BF16,   D_BF16)
+        IO_TYPE(D_U8,   D_F16)
    END_IO_TYPE_DECL(CAST)
    if(!VALIDATE_OP_IO_TYPES(CAST, self, inputs, self->input.num, outputs, self->output.num)) {
        char* desc = generate_op_io_types_desc(inputs,
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_concat.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_concat.c
@ -37,6 +37,29 @@
 #include "utils/vsi_nn_dtype_util.h"
 #include "utils/vsi_nn_constraint_check.h"

+static vsi_bool _enable_concat_optimize()
+{
+    char *envctrl;
+    static int32_t enableOptimize = -1;
+
+    if (enableOptimize == -1)
+    {
+        enableOptimize = 1;
+        envctrl = getenv("VSI_NN_ENABLE_CONCAT_OPTIMIZE");
+        if (envctrl)
+        {
+            enableOptimize = atoi(envctrl);
+        }
+    }
+
+    if (enableOptimize == 1)
+    {
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
 static int32_t _get_input_num
    (
    vsi_nn_node_t   * self,
@ -243,7 +266,8 @@ static vsi_status op_compute

    status = VSI_SUCCESS;
    self->n = NULL;
-    if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs))
+    if(_is_highest_dimension(self, outputs) && _is_same_quant(self, inputs, outputs)
+        && _enable_concat_optimize())
    {
        iter = self->nn_param.concat.lcl_data;
        while( NULL != iter )
@ -397,7 +421,8 @@ static vsi_status op_optimize
    status = VSI_SUCCESS;
    /* we don't create tensor view if the axis is not the highest dimension */
    if (_is_highest_dimension(self, outputs) == FALSE ||
-        _is_same_quant(self, inputs, outputs) == FALSE)
+        _is_same_quant(self, inputs, outputs) == FALSE ||
+        _enable_concat_optimize() == FALSE)
    {
        return status;
    }
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_dataconvert.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_dataconvert.c
@ -194,6 +194,7 @@ static vsi_bool op_check
        IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
        IO_TYPE(D_I16|Q_DFP,  D_I8|Q_DFP)
        IO_TYPE(D_I16|Q_DFP,  D_U8|Q_ASYM)
+        IO_TYPE(D_I16|Q_DFP,  D_F16)
        IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
        IO_TYPE(D_I8|Q_DFP,   D_I8|Q_ASYM)
        IO_TYPE(D_I8|Q_ASYM,  D_I8|Q_ASYM)
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_depthwise_conv1d.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_depthwise_conv1d.c
@ -31,6 +31,7 @@
 #include "vsi_nn_prv.h"
 #include "vsi_nn_ops.h"
 #include "vsi_nn_tensor.h"
+#include "vsi_nn_tensor_util.h"
 #include "utils/vsi_nn_util.h"
 #include "kernel/vsi_nn_kernel.h"
 /*
@ -73,6 +74,7 @@ static vsi_status op_compute
    {
        status = VSI_SUCCESS;
    }
+
    vsi_nn_kernel_param_release( &param );
    return status;
 } /* op_compute() */
@ -119,7 +121,7 @@ static vsi_bool op_setup
            VSI_NN_ROUND_FLOOR
            );

-        outputs[0]->attr.size[1] = inputs[1]->attr.size[2];
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1] * p->multiplier;
        outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
    }
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_eltwise_unary.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_eltwise_unary.c
@ -45,22 +45,30 @@ static vsi_status _eltwise_unary_op_compute
    )
 {
    vsi_status status = VSI_FAILURE;
+    float alpha = 0;
+    vsi_nn_kernel_param_t * param = NULL;

    if( NULL == self )
    {
        return status;
    }
+    param = vsi_nn_kernel_param_create();
+
+    alpha = self->nn_param.elu.alpha;
+    vsi_nn_kernel_param_add_float32( param, "alpha", alpha );

    // TODO: This optimzie is a hack for gpu path,
    // it should be moved to gpu kernel setup.
    self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
-        kernel_name, inputs, 1, outputs, 1, NULL );
+        kernel_name, inputs, 1, outputs, 1, param );

    if( self->n )
    {
        status = VSI_SUCCESS;
    }

+    vsi_nn_kernel_param_release( &param );
+
    return status;
 } /* _eltwise_op_compute() */

@ -152,6 +160,19 @@ static vsi_bool op_check
    return TRUE;
 } /* op_check() */

+static vsi_status op_init
+    (
+    vsi_nn_node_t * self
+    )
+{
+    if (vsi_nn_compareVersion(self->graph, 1, 1, 29) == -1)
+    {
+        self->nn_param.elu.alpha = 1;
+    }
+
+    return VSI_SUCCESS;
+} /* op_init() */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@ -166,7 +187,7 @@ extern "C" {
    { \
        return _eltwise_unary_op_compute( ""#kernel_name, self, inputs, outputs ); \
    } \
-DEF_OP_REG(name, NULL, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1)
+DEF_OP_REG(name, op_init, op_compute_##kernel_name, vsi_nn_op_common_deinit, op_check, op_setup, NULL, 2, 1)

 DEF_ELEMENT_WISE_UNARY_OP( SIN, sin );
 DEF_ELEMENT_WISE_UNARY_OP( EXP, exp );
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_hashtable_lookup.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_hashtable_lookup.c
@ -73,6 +73,7 @@ static vsi_bool op_check
        IO_TYPE(D_I32, D_I32,  D_I32, D_F16, D_I32)
        IO_TYPE(D_I32, D_I32,  D_F32, D_F16, D_F32)
        IO_TYPE(D_I32, D_I32,  D_U8|Q_ASYM, D_F16, D_U8|Q_ASYM)
+        IO_TYPE(D_I32, D_I32,  D_F32, D_F32, D_U8|Q_ASYM)
    END_IO_TYPE_DECL(HASHTABLE_LOOKUP)
    if (!VALIDATE_OP_IO_TYPES(HASHTABLE_LOOKUP, self, inputs, self->input.num, outputs, self->output.num))
    {
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_interp.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_interp.c
@ -0,0 +1,298 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (2)
+#define _OUTPUT_NUM         (1)
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_internal_compute_node( self );
+
+    return status;
+} /* op_compute() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_nn_interp_param *p = NULL;
+
+    p = &self->nn_param.interp;
+
+    if ((p->pad_beg > 0) || (p->pad_end > 0))
+    {
+        VSILOGE("Only supports non-pos padding (cropping) for now ");
+        return FALSE;
+    }
+
+    return TRUE;
+} /* op_check() */
+
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_nn_interp_param *p = NULL;
+    int32_t height_in_eff_, width_in_eff_;
+    int32_t height_out, width_out;
+    vsi_nn_internal_node_t* curr = NULL;
+    vsi_nn_internal_tensor_t *crop_tensor = NULL;
+    vsi_nn_tensor_t *crop_in_tensor = NULL;
+    float factor = 1.0f;
+    int32_t pad_beg = 0;
+    int32_t pad_end = 0;
+
+    if ( NULL == self )
+    {
+        return FALSE;
+    }
+
+    p = &self->nn_param.interp;
+    pad_beg = -p->pad_beg;
+    pad_end = -p->pad_end;
+    width_in_eff_  = inputs[0]->attr.size[0] + p->pad_beg + p->pad_end;
+    height_in_eff_ = inputs[0]->attr.size[1] + p->pad_beg + p->pad_end;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        memcpy( outputs[0]->attr.size, inputs[0]->attr.size,
+            VSI_NN_MAX_DIM_NUM * sizeof( uint32_t ) );
+        if ((p->shrink_factor > 0) && (p->zoom_factor <= 0))
+        {
+            width_out  = (width_in_eff_ - 1) / p->shrink_factor + 1;
+            height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
+        }
+        else if ((p->zoom_factor > 0) && (p->shrink_factor <= 0))
+        {
+            width_out  = (width_in_eff_ - 1) * (p->zoom_factor - 1) + width_in_eff_;
+            height_out = (height_in_eff_ - 1) * (p->zoom_factor - 1) + height_in_eff_;
+        }
+        else if ((p->height > 0) && (p->width > 0))
+        {
+            width_out  = p->width;
+            height_out = p->height;
+        }
+        else if ((p->zoom_factor > 0) && (p->shrink_factor > 0))
+        {
+            width_out  = (width_in_eff_ - 1) / p->shrink_factor + 1;
+            height_out = (height_in_eff_ - 1) / p->shrink_factor + 1;
+            width_out  = (width_out - 1) * (p->zoom_factor - 1) + width_out;
+            height_out = (height_out - 1) * (p->zoom_factor - 1) + height_out;
+        }
+        else if (NULL != inputs[1])
+        {
+            width_out  = inputs[1]->attr.size[0];
+            height_out = inputs[1]->attr.size[1];
+        }
+        else
+        {
+            VSILOGE("Not support params ");
+            return FALSE;
+        }
+
+        if ((width_out < 0) || (height_out < 0) || (width_in_eff_ < 0) || (height_in_eff_ < 0))
+        {
+            VSILOGE("value shoud be positive: width_out %d height_out %d width_in_eff_ %d height_in_eff_ %d ",
+                    width_out, height_out, width_in_eff_, height_in_eff_);
+            return FALSE;
+        }
+
+        outputs[0]->attr.size[0] = width_out;
+        outputs[0]->attr.size[1] = height_out;
+    }
+
+    factor = (float)(outputs[0]->attr.size[0]) / (float)(width_in_eff_);
+
+    if ((pad_beg > 0) || (pad_end > 0))
+    {
+        vsi_nn_tensor_attr_t attr;
+        int32_t use_virtual_tensor = 1;
+        int32_t *begin_dims;
+        int32_t *end_dims;
+        int32_t *stride_dims;
+        uint32_t i;
+        memset(&attr, 0, sizeof(vsi_nn_tensor_attr_t));
+        vsi_nn_internal_init_tensor_attr(&attr, &inputs[0]->attr.dtype, use_virtual_tensor);
+        crop_tensor = vsi_nn_internal_new_tensor( self, &attr, 0.0f );
+        crop_in_tensor = crop_tensor->t;
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_STRIDED_SLICE, 1, 1 );
+        curr->node->nn_param.strided_slice.begin_dims_num = inputs[0]->attr.dim_num;
+        curr->node->nn_param.strided_slice.end_dims_num = inputs[0]->attr.dim_num;
+        curr->node->nn_param.strided_slice.stride_dims_num = inputs[0]->attr.dim_num;
+        curr->node->nn_param.strided_slice.begin_mask = 0;
+        curr->node->nn_param.strided_slice.end_mask = 0;
+        curr->node->nn_param.strided_slice.shrink_axis_mask = 0;
+        begin_dims = (int32_t *)vsi_nn_internal_new_node_param(curr,
+            VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
+        end_dims   = (int32_t *)vsi_nn_internal_new_node_param(curr,
+            VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
+        stride_dims  = (int32_t *)vsi_nn_internal_new_node_param(curr,
+            VSI_NN_MAX_DIM_NUM * sizeof(uint32_t));
+        for (i = 0; i < inputs[0]->attr.dim_num; i++)
+        {
+            stride_dims[i] = 1;
+        }
+
+        begin_dims[0] = pad_beg;
+        begin_dims[1] = pad_beg;
+        end_dims[0]   = inputs[0]->attr.size[0] - pad_end;
+        end_dims[1]   = inputs[0]->attr.size[1] - pad_end;
+
+        if (inputs[0]->attr.dim_num > 2)
+        {
+            for (i = 2 ; i < inputs[0]->attr.dim_num; i++)
+            {
+                begin_dims[i] = 0;
+                end_dims[i]   = inputs[0]->attr.size[i];
+            }
+        }
+        curr->node->nn_param.strided_slice.begin_dims = begin_dims;
+        curr->node->nn_param.strided_slice.end_dims = end_dims;
+        curr->node->nn_param.strided_slice.stride_dims = stride_dims;
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = crop_in_tensor;
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else
+    {
+        crop_in_tensor = inputs[0];
+    }
+
+    if ((width_in_eff_ == (int32_t)outputs[0]->attr.size[0]) && (height_in_eff_ == (int32_t)outputs[0]->attr.size[1]))
+    {
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 1, 1 );
+        curr->inputs[0]  = crop_in_tensor;
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else
+    {
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_INTERNAL, 1, 1 );
+        curr->node->nn_param.resize_internal.align_corners = vx_true_e;
+        curr->node->nn_param.resize_internal.factor = factor;
+        curr->node->nn_param.resize_internal.half_pixel_centers = vx_false_e;
+        curr->inputs[0]  = crop_in_tensor;
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_optimize
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs,
+    vsi_nn_opt_direction_e direction
+    )
+{
+    vsi_status     status;
+
+    status = VSI_SUCCESS;
+    vsi_nn_internal_optimize_node( self, direction );
+
+    return status;
+} /* op_optimize() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_internal_init_node_wksp(self);
+    self->nn_param.interp.height         = 0;
+    self->nn_param.interp.width          = 0;
+    self->nn_param.interp.pad_beg        = 0;
+    self->nn_param.interp.pad_end        = 0;
+    self->nn_param.interp.shrink_factor  = 0;
+    self->nn_param.interp.zoom_factor    = 0;
+
+    return status;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    vsi_nn_internal_deinit_node_wksp(self);
+    status = vsi_nn_op_common_deinit(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ INTERP,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ op_optimize,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_permute.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_permute.c
@ -155,11 +155,13 @@ static vsi_bool op_check
    BEGIN_IO_TYPE_DECL(PERMUTE, 1, 1)
        IO_TYPE(D_F16,  D_F16)
        IO_TYPE(D_F16,  D_F32)
+        IO_TYPE(D_I16,  D_I16)
        IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
        IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
        IO_TYPE(D_U8|Q_ASYM,  D_U8|Q_ASYM)
        IO_TYPE(D_U8|Q_ASYM,  D_F16)
        IO_TYPE(D_U8|Q_ASYM,  D_F32)
+        IO_TYPE(D_I8|Q_SYM_PC,   D_I8|Q_SYM_PC)
        IO_TYPE(D_BOOL8,  D_BOOL8)
        IO_TYPE(D_BOOL8,  D_I8|Q_DFP)
        IO_TYPE(D_F32,  D_F32)
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d.c
@ -0,0 +1,207 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+
+static vsi_bool _is_same_shape
+    (
+    vsi_nn_tensor_t * inputs,
+    uint32_t *sizes,
+    uint32_t dims
+    )
+{
+    uint32_t i = 0;
+
+    if (inputs->attr.dim_num != dims)
+        return FALSE;
+
+    for (i = 0; i < dims; i++)
+    {
+        if (sizes[i] != inputs->attr.size[i])
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_FAILURE;
+
+    status = vsi_nn_internal_compute_node( self );
+
+    return status;
+} /* op_compute() */
+
+
+static vsi_status op_optimize
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs,
+    vsi_nn_opt_direction_e direction
+    )
+{
+    if ( _is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num) )
+    {
+        return vsi_nn_internal_optimize_node(self, direction );
+    }
+    else
+    {
+        return VSI_SUCCESS;
+    }
+} /* op_optimize() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    return TRUE;
+} /* op_check() */
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    float factor = self->nn_param.resize_1d.factor;
+    vsi_nn_internal_node_t* curr = NULL;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        if (factor != 0)
+        {
+            outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
+        }
+        else
+        {
+            outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
+        }
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
+        outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
+        outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
+    }
+
+    if (_is_same_shape(inputs[0], outputs[0]->attr.size, outputs[0]->attr.dim_num))
+    {
+        vsi_nn_internal_init_node_wksp( self );
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_DATACONVERT, 0, 0 );
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else if (VSI_NN_INTERPOLATION_BILINEAR == self->nn_param.resize_1d.type)
+    {
+        vsi_nn_internal_init_node_wksp( self );
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_BILINEAR_INTERNAL, 0, 0 );
+        curr->node->nn_param.resize_1d_bilinear_internal.align_corners = self->nn_param.resize_1d.align_corners;
+        curr->node->nn_param.resize_1d_bilinear_internal.factor = self->nn_param.resize_1d.factor;
+        curr->node->nn_param.resize_1d_bilinear_internal.half_pixel_centers = \
+                                              self->nn_param.resize_1d.half_pixel_centers;
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+    else if (VSI_NN_INTERPOLATION_NEAREST_NEIGHBOR == self->nn_param.resize_1d.type)
+    {
+        vsi_nn_internal_init_node_wksp( self );
+        curr = vsi_nn_internal_new_node( self, VSI_NN_OP_RESIZE_1D_NEAREST_INTERNAL, 0, 0 );
+        curr->node->nn_param.resize_1d_nearest_internal.align_corners = self->nn_param.resize_1d.align_corners;
+        curr->node->nn_param.resize_1d_nearest_internal.factor = self->nn_param.resize_1d.factor;
+        curr->node->nn_param.resize_1d_nearest_internal.half_pixel_centers = \
+                                              self->nn_param.resize_1d.half_pixel_centers;
+        curr->inputs[0]  = inputs[0];
+        curr->outputs[0] = outputs[0];
+        vsi_nn_internal_setup_node(self, curr);
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+    return VSI_SUCCESS;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_internal_deinit_node_wksp(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ RESIZE_1D,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ op_optimize,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_bilinear_internal.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_bilinear_internal.c
@ -0,0 +1,171 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "utils/vsi_nn_constraint_check.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    int32_t  align_corners       = self->nn_param.resize_1d_bilinear_internal.align_corners;
+    int32_t  half_pixel_centers  = self->nn_param.resize_1d_bilinear_internal.half_pixel_centers;
+    vsi_nn_kernel_param_t * param = NULL;
+
+    param = vsi_nn_kernel_param_create();
+
+    vsi_nn_kernel_param_add_int32( param, "align_corners",  align_corners );
+    vsi_nn_kernel_param_add_int32( param, "half_pixel_centers",  half_pixel_centers );
+
+    self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
+            "resize_1d_bilinear",
+            &inputs[0], 1,
+            &outputs[0], 1, param );
+
+    if ( self->n )
+    {
+        status = VSI_SUCCESS;
+    }
+
+    vsi_nn_kernel_param_release( &param );
+
+    return status;
+} /* op_compute() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    BEGIN_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL, 1, 1)
+        IO_TYPE(D_F16,  D_U8|Q_ASYM)
+        IO_TYPE(D_F16,  D_F16)
+        IO_TYPE(D_BF16, D_BF16)
+        IO_TYPE(D_F32,  D_F32)
+        IO_TYPE(D_U8|Q_ASYM,  D_U8|Q_ASYM)
+        IO_TYPE(D_U8|Q_ASYM,  D_F16)
+        IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
+        IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
+    END_IO_TYPE_DECL(RESIZE_1D_BILINEAR_INTERNAL)
+    if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_BILINEAR_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
+        char* desc = generate_op_io_types_desc(inputs,
+                self->input.num, outputs, self->output.num);
+        VSILOGE("Inputs/Outputs data type not support: %s", desc);
+        destroy_op_io_types_desc(desc);
+        return FALSE;
+    }
+
+    return TRUE;
+} /* op_check() */
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    float factor = self->nn_param.resize_1d_bilinear_internal.factor;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        if (factor != 0)
+        {
+            outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
+        }
+        else
+        {
+            outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
+        }
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
+        outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
+        outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+
+    return VSI_SUCCESS;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_op_common_deinit(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ RESIZE_1D_BILINEAR_INTERNAL,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ NULL,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_nearest_internal.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_resize_1d_nearest_internal.c
@ -0,0 +1,170 @@
+/****************************************************************************
+*
+*    Copyright (c) 2020 Vivante Corporation
+*
+*    Permission is hereby granted, free of charge, to any person obtaining a
+*    copy of this software and associated documentation files (the "Software"),
+*    to deal in the Software without restriction, including without limitation
+*    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*    and/or sell copies of the Software, and to permit persons to whom the
+*    Software is furnished to do so, subject to the following conditions:
+*
+*    The above copyright notice and this permission notice shall be included in
+*    all copies or substantial portions of the Software.
+*
+*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+*    DEALINGS IN THE SOFTWARE.
+*
+*****************************************************************************/
+
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "vsi_nn_types.h"
+#include "vsi_nn_log.h"
+#include "vsi_nn_node.h"
+#include "vsi_nn_prv.h"
+#include "vsi_nn_ops.h"
+#include "vsi_nn_tensor.h"
+#include "utils/vsi_nn_util.h"
+#include "kernel/vsi_nn_kernel.h"
+#include "utils/vsi_nn_constraint_check.h"
+
+/*
+ Declare number of input and output.
+ */
+#define _INPUT_NUM          (1)
+#define _OUTPUT_NUM         (1)
+
+static vsi_status op_compute
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    vsi_status status = VSI_FAILURE;
+    int32_t  align_corners       = self->nn_param.resize_1d_nearest_internal.align_corners;
+    int32_t  half_pixel_centers  = self->nn_param.resize_1d_nearest_internal.half_pixel_centers;
+    vsi_nn_kernel_param_t * param = NULL;
+
+    param = vsi_nn_kernel_param_create();
+
+    vsi_nn_kernel_param_add_int32( param, "align_corners",  align_corners );
+    vsi_nn_kernel_param_add_int32( param, "half_pixel_centers",  half_pixel_centers );
+
+    self->n = (vx_node)vsi_nn_kernel_selector( self->graph,
+            "resize_1d_nearest",
+            &inputs[0], 1,
+            &outputs[0], 1, param );
+
+    if ( self->n )
+    {
+        status = VSI_SUCCESS;
+    }
+
+    vsi_nn_kernel_param_release( &param );
+
+    return status;
+} /* op_compute() */
+
+static vsi_bool op_check
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    BEGIN_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL, 1, 1)
+        IO_TYPE(D_F16,  D_F16)
+        IO_TYPE(D_BF16, D_BF16)
+        IO_TYPE(D_F32,  D_F32)
+        IO_TYPE(D_U8|Q_ASYM,  D_U8|Q_ASYM)
+        IO_TYPE(D_I8|Q_DFP,   D_I8|Q_DFP)
+        IO_TYPE(D_I16|Q_DFP,  D_I16|Q_DFP)
+    END_IO_TYPE_DECL(RESIZE_1D_NEAREST_INTERNAL)
+    if (!VALIDATE_OP_IO_TYPES(RESIZE_1D_NEAREST_INTERNAL, self, inputs, self->input.num, outputs, self->output.num)) {
+        char* desc = generate_op_io_types_desc(inputs,
+                self->input.num, outputs, self->output.num);
+        VSILOGE("Inputs/Outputs data type not support: %s", desc);
+        destroy_op_io_types_desc(desc);
+
+        return FALSE;
+    }
+
+    return TRUE;
+} /* op_check() */
+
+static vsi_bool op_setup
+    (
+    vsi_nn_node_t * self,
+    vsi_nn_tensor_t ** inputs,
+    vsi_nn_tensor_t ** outputs
+    )
+{
+    float factor = self->nn_param.resize_1d_nearest_internal.factor;
+
+    if ( VSI_NN_DIM_AUTO == outputs[0]->attr.dim_num )
+    {
+        outputs[0]->attr.dim_num = inputs[0]->attr.dim_num;
+        if (factor != 0)
+        {
+            outputs[0]->attr.size[0] = (uint32_t)(inputs[0]->attr.size[0] * factor);
+        }
+        else
+        {
+            outputs[0]->attr.size[0] = self->nn_param.resize_1d.size[0];
+        }
+        outputs[0]->attr.size[1] = inputs[0]->attr.size[1];
+        outputs[0]->attr.size[2] = inputs[0]->attr.size[2];
+        outputs[0]->attr.size[3] = inputs[0]->attr.size[3];
+    }
+
+    return TRUE;
+} /* op_setup() */
+
+static vsi_status op_init
+    (
+    vsi_nn_node_t* self
+    )
+{
+
+    return VSI_SUCCESS;
+} /* op_init() */
+
+static vsi_status op_deinit
+    (
+    vsi_nn_node_t* self
+    )
+{
+    vsi_status status = VSI_SUCCESS;
+
+    status = vsi_nn_op_common_deinit(self);
+
+    return status;
+} /* op_deinit() */
+
+__BEGIN_DECLS
+
+/* Registrar */
+DEF_OP_REG
+    (
+    /* op_name    */ RESIZE_1D_NEAREST_INTERNAL,
+    /* init       */ op_init,
+    /* compute    */ op_compute,
+    /* deinit     */ op_deinit,
+    /* check      */ op_check,
+    /* setup      */ op_setup,
+    /* optimize   */ NULL,
+    /* input_num  */ _INPUT_NUM,
+    /* output_num */ _OUTPUT_NUM
+    );
+
+__END_DECLS
+
--- a/src/tim/vx/internal/src/ops/vsi_nn_op_svdf.c
+++ b/src/tim/vx/internal/src/ops/vsi_nn_op_svdf.c
@ -163,12 +163,17 @@ static vsi_bool op_check
        ret = FALSE;
    }

+    if(ret)
    {
        BEGIN_IO_TYPE_DECL(SVDF, 5, 2)
            IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_F16)
            IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_F16)
            IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_F16)
            IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_F32)
+            IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F16, D_F16, D_NONE)
+            IO_TYPE(D_F16, D_F16, D_F16, D_F16, D_F32, D_F16, D_NONE)
+            IO_TYPE(D_F32, D_F16, D_F16, D_F16, D_F32, D_F32, D_NONE)
+            IO_TYPE(D_F32, D_F32, D_F32, D_F32, D_F32, D_F32, D_NONE)
        END_IO_TYPE_DECL(SVDF)
        if(!VALIDATE_OP_IO_TYPES(SVDF, self, inputs, self->input.num, outputs, self->output.num)) {
            char* desc = generate_op_io_types_desc(inputs,
--- a/src/tim/vx/internal/src/utils/vsi_nn_code_generator.c
+++ b/src/tim/vx/internal/src/utils/vsi_nn_code_generator.c
@ -117,8 +117,7 @@ static void _try_pack_tensor_data
            {
                *p_sz = (uint64_t)bytes;
            }
-            free( data );
-            data = NULL;
+            vsi_nn_safe_free( data );
        }
    }
 } /* _pack_tensor_data() */
@ -417,6 +416,8 @@ static _op_param_gen_t s_op_gen[] =
    /* PRE_PROCESS_NV12 */      NULL,
    /* SCATTER_ND */            NULL,
    /* DECONVOLUTION1D */       NULL,
+    /* INTERP */                NULL,
+    /* RESIZE_1D */             NULL,
 };
 _compiler_assert( _cnt_of_array(s_op_gen) == VSI_NN_OP_NUM, vsi_nn_code_generator_c );

--- a/src/tim/vx/internal/src/utils/vsi_nn_tensor_op.c
+++ b/src/tim/vx/internal/src/utils/vsi_nn_tensor_op.c
@ -149,7 +149,7 @@ vsi_nn_tensor_t* vsi_nn_Concat
            src = j;
            memcpy( &buffer[dst * type_bytes], &tmp[src * type_bytes], type_bytes );
        }
-        free(tmp);
+        vsi_nn_safe_free( tmp );
        offset += strides[axis] * tensors[i]->attr.size[axis];
    }
    tensor_out = vsi_nn_CreateTensorFromData( graph, buffer, &output_attr );
@ -221,11 +221,7 @@ vsi_nn_tensor_t* vsi_nn_ConvertTensorDtype
        }
    }

-    if( src_buf )
-    {
-        free( src_buf );
-        src_buf = NULL;
-    }
+    vsi_nn_safe_free( src_buf );
    if( dst_buf )
    {
        free( dst_buf );
@ -333,10 +329,7 @@ vsi_nn_tensor_t* vsi_nn_TensorAdd
 error:
    for ( i = 0; i < tensor_num; i++ )
    {
-        if ( buffer[i] )
-        {
-            free(buffer[i]);
-        }
+        vsi_nn_safe_free( buffer[i] );
    }
    if( tmp )
    {
--- a/src/tim/vx/internal/src/utils/vsi_nn_util.c
+++ b/src/tim/vx/internal/src/utils/vsi_nn_util.c
@ -710,33 +710,6 @@ vsi_bool vsi_nn_CheckFilePath
    return FALSE;
 } /* vsi_nn_CheckFilePath() */

-void vsi_nn_GetFP32MultiAndPostShift
-    (
-    vx_float32 mult,
-    vx_uint16 *M0,
-    vx_int8 *N
-    )
-{
-    vx_uint32 uintMult          = *((vx_uint32*)(&mult));
-    vx_uint32 tmpMultiply       = 0;
-    vx_int32  exp               = 0;
-    vx_uint32 postShiftBit6to5  = 0;
-    vx_uint32 postShift         = 0;
-    vx_int8   tmpPostShift      = 0;
-
-    tmpMultiply         = (uintMult & 0x7FFFFF) >> 8;
-    *M0                 = (vx_uint16)((1U << 15) + tmpMultiply);
-
-    exp                 = (uintMult & 0x7F800000) >> 23; /* postShift is Scale's exp*/
-    tmpPostShift        = 15 - ((vx_int8)exp - 127);
-    postShift           = tmpPostShift & 0x1F;
-    tmpPostShift        = tmpPostShift >> 5;
-    postShiftBit6to5    = tmpPostShift & 3;
-
-    *N = (vx_int8)(((postShiftBit6to5 << 5) | (postShift & 0x1F)));
-    *N = (((vx_int32)*N << 25) >> 25);
-}/* vsi_nn_GetFP32MultiAndPostShift() */
-
 typedef struct
 {
    uint8_t* raw_addr;
--- a/src/tim/vx/internal/src/vsi_nn_graph_optimization.c
+++ b/src/tim/vx/internal/src/vsi_nn_graph_optimization.c
@ -520,6 +520,7 @@ static vx_tensor _create_const_raw_tensor
    vx_tensor tensor = NULL;
    vx_tensor_create_params_t params;
    float * scales = NULL;
+    int32_t * zeroPoints = NULL;

    memset( &params, 0, sizeof( vx_tensor_create_params_t ) );
    params.num_of_dims = attr.dim_num;
@ -539,12 +540,14 @@ static vx_tensor _create_const_raw_tensor
 #ifdef VSI_PERCHANNEL_QUANTIZATION_SUPPORT
        // This is a hack that driver doesn't support const scale
        scales = (float *)malloc(sizeof(float) * attr.dtype.scale_dim);
+        zeroPoints = (int32_t *)malloc(sizeof(int32_t) * attr.dtype.zero_points_dim);
        memcpy(scales, attr.dtype.scales, attr.dtype.scale_dim * sizeof(float));
+        memcpy(zeroPoints, attr.dtype.zero_points, attr.dtype.zero_points_dim * sizeof(float));
        params.quant_data.affinePerChannel.channelDim = attr.dtype.channel_dim;
        params.quant_data.affinePerChannel.scaleCount = attr.dtype.scale_dim;
        params.quant_data.affinePerChannel.scales = scales;
-        params.quant_data.affinePerChannel.zeroPoint = NULL;
-        params.quant_data.affinePerChannel.zeroPointCount = 0;
+        params.quant_data.affinePerChannel.zeroPoint = zeroPoints;
+        params.quant_data.affinePerChannel.zeroPointCount = attr.dtype.zero_points_dim;
        break;
 #else
    VSILOGE( "can't support qnt_type VSI_NN_QNT_TYPE_AFFINE_PERCHANNEL_SYMMETRIC." );
@ -580,6 +583,10 @@ static vx_tensor _create_const_raw_tensor
                    {
                        free( scales );
                    }
+                    if (zeroPoints)
+                    {
+                        free( zeroPoints );
+                    }
                    return NULL;
                }
            }
@ -620,6 +627,10 @@ static vx_tensor _create_const_raw_tensor
    {
        free( scales );
    }
+    if (zeroPoints)
+    {
+        free( zeroPoints );
+    }

    return tensor;
 } /* _create_const_raw_tensor() */
@ -689,6 +700,8 @@ static void _convert_const_I8toU8

    if ( tensor->t ) vxReleaseTensor(&tensor->t);
    tensor->t = vsi_nn_CreateRawTensorFromData(graph, data, attr);
+
+    vsi_nn_safe_free( data );
 }/* _convert_const_I8toU8() */

 static vsi_status _convert_graph_const_tensor
--- a/src/tim/vx/internal/src/vsi_nn_log.c
+++ b/src/tim/vx/internal/src/vsi_nn_log.c
@ -29,25 +29,49 @@
 #include "vsi_nn_log.h"
 #include "vsi_nn_types.h"

+#ifdef __ANDROID__
+#if ANDROID_SDK_VERSION >= 30
+static const char* ENV_LOG_LEVEL = "vendor.VSI_NN_LOG_LEVEL";
+#else
+static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
+#endif
+#else
+static const char* ENV_LOG_LEVEL = "VSI_NN_LOG_LEVEL";
+#endif
+
+int get_env_as_int(const char* env, int default_value) {
+
+    int value = default_value;
+    #ifdef __ANDROID__
+    {
+        char value_str[100];
+        int status = __system_property_get(env, value_str);
+        if (status) {
+            value = atoi(value_str);
+        }
+    }
+    #else
+    {
+        char* env_s = getenv(env);
+        if (env_s) {
+            value = atoi(env_s);
+        }
+    }
+    #endif
+
+    return value;
+}
+
 static vsi_bool _check_log_level
    (
    vsi_nn_log_level_e level
    )
 {
-    char *env_level_s;
    static vsi_nn_log_level_e env_level = VSI_NN_LOG_UNINIT;

    if(env_level == VSI_NN_LOG_UNINIT)
    {
-        env_level_s = getenv("VSI_NN_LOG_LEVEL");
-        if(env_level_s)
-        {
-            env_level = (vsi_nn_log_level_e)atoi(env_level_s);
-        }
-        else
-        {
-            env_level = VSI_NN_LOG_WARN;
-        }
+        env_level = (vsi_nn_log_level_e)get_env_as_int(ENV_LOG_LEVEL, VSI_NN_LOG_WARN);
    }

    if(env_level >= level)
--- a/src/tim/vx/internal/src/vsi_nn_tensor.c
+++ b/src/tim/vx/internal/src/vsi_nn_tensor.c
@ -844,7 +844,7 @@ float * vsi_nn_ConvertTensorToFloat32Data

    if( !tensor->attr.is_created_from_handle )
    {
-        if(tensor_data)free(tensor_data);
+        vsi_nn_safe_free( tensor_data );
    }
    return data;
 } /* vsi_nn_ConvertTensorToFloat32Data() */
@ -1095,7 +1095,7 @@ void vsi_nn_SaveTensorToTextByFp32
    }
    fwrite( buf, count, 1, fp );
    fclose( fp );
-    free( data );
+    vsi_nn_safe_free( data );
 } /* vsi_nn_SaveTensorToTextByFp32() */

 void vsi_nn_SaveTensorToText
@ -1124,7 +1124,7 @@ void vsi_nn_SaveTensorToText
    sz = vsi_nn_GetElementNum( tensor );
    vsi_nn_SaveDataToText( filename, data, sz,
        tensor->attr.dtype.vx_type, seperator );
-    free( data );
+    vsi_nn_safe_free( data );
 } /* vsi_nn_SaveTensorToText() */

 void vsi_nn_SaveDataToText
@ -1219,7 +1219,7 @@ void vsi_nn_SaveTensorToBinary
    }
    fwrite( data, sz, 1, fp );
    fclose( fp );
-    free( data );
+    vsi_nn_safe_free( data );
 } /* vsi_nn_SaveTensorToBinary() */

 vsi_nn_tensor_t * vsi_nn_CreateTensorFromData
@ -1539,7 +1539,7 @@ void vsi_nn_TransposeTensor
        VSILOGE( "Copy transpose data fail with code %#x.", status );
    }

-    free( buf );
+    vsi_nn_safe_free( buf );
    free( dst );
 } /* vsi_nn_TransposeTensor() */

@ -1588,7 +1588,7 @@ void vsi_nn_PermuteTensor
        if( perm[i] >= dim_num )
        {
            VSILOGW( "Incorrect perm %d", perm[i] );
-            if( buf ) { free(buf); buf = NULL; }
+            vsi_nn_safe_free( buf );
            if( dst ) { free(dst); dst = NULL; }
            return;
        }
@ -1603,7 +1603,7 @@ void vsi_nn_PermuteTensor
        VSILOGE( "Copy permute data fail with code %#x.", status );
    }

-    if( buf ) { free(buf); buf = NULL; }
+    vsi_nn_safe_free( buf );
    if( dst ) { free(dst); dst = NULL; }
 } /* vsi_nn_PermuteTensor() */

@ -2241,7 +2241,7 @@ void vsi_nn_reshuffle_weight_data
    }
    vsi_nn_CopyDataToTensor( graph, weights, weight_data );
    vsi_nn_Free( buffer );
-    vsi_nn_Free( weight_data );
+    vsi_nn_safe_free( weight_data );
 }

 vsi_nn_tensor_t* vsi_nn_ConcatTensor_impl