Fixed no-output if transpose is last op and can be optimized (#395)

* Fixed no-output if transpose is last op and can be optimized

If transpose can be erased by layout inference, replace it as a
reshape - input and output have same shape - expect low-level
optimization erase the reshape

Signed-off-by: xiang.zhang <xiang.zhang@verisilicon.com>
This commit is contained in:
Sven 2022-05-13 16:59:25 +08:00 committed by GitHub
parent b3677305c4
commit 4f2991c853
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 15 deletions

View File

@ -117,7 +117,7 @@ jobs:
# AI-Benchmark 5.0.1 model zoo
mobilenet_v2_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download binary
uses: actions/download-artifact@v3
@ -132,7 +132,7 @@ jobs:
mobilenet_v2_b8_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download binary
uses: actions/download-artifact@v3
@ -146,7 +146,7 @@ jobs:
resnet_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -160,7 +160,7 @@ jobs:
inception_v3_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -174,7 +174,7 @@ jobs:
mobilenet_v3_b4_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -188,7 +188,7 @@ jobs:
mobilenet_v3_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -202,7 +202,7 @@ jobs:
mv3_depth_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -216,7 +216,7 @@ jobs:
yolo_v4_tiny_quant:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -231,7 +231,7 @@ jobs:
# Disable huge compilation cost
# deeplab_v3_plus_quant:
# runs-on: ubuntu-latest
# needs: vx-delegate-build
# needs: [vx-delegate-build, tim-vx-unit-test]
# steps:
# - name: download test binary
# uses: actions/download-artifact@v3
@ -261,7 +261,7 @@ jobs:
tfhub-efficientdet-lite0:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -275,7 +275,7 @@ jobs:
tfhub-efficientdet-lite1:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -289,7 +289,7 @@ jobs:
tfhub-efficientdet-lite2:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -303,7 +303,7 @@ jobs:
tfhub-efficientdet-lite3:
runs-on: ubuntu-latest
needs: vx-delegate-build
needs: [vx-delegate-build, tim-vx-unit-test]
steps:
- name: download test binary
uses: actions/download-artifact@v3
@ -315,6 +315,20 @@ jobs:
chmod u+x ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model
${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model --num_runs=1 --external_delegate_path=${{github.workspace}}/vx-delegate-bin/libvx_delegate.so --graph=${{github.workspace}}/1.tflite
# acuity-yolov3-608-quant:
# runs-on: ubuntu-latest
# needs: [vx-delegate-build, tim-vx-unit-test]
# steps:
# - name: download test binary
# uses: actions/download-artifact@v3
# - name: download model
# run: |
# curl -LJO https://github.com/sunshinemyson/TIM-VX/releases/download/v1.1.30.2/yolov3_608relu_quant.acuity.tflite
# - name: benchmark-model
# run: |
# chmod u+x ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model
# ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model --num_runs=1 --external_delegate_path=${{github.workspace}}/vx-delegate-bin/libvx_delegate.so --graph=${{github.workspace}}/yolov3_608relu_quant.acuity.tflite
# Graph compilation time is huge over 20mins
# tfhub-efficientdet-lite4:
# runs-on: ubuntu-latest

View File

@ -56,8 +56,14 @@ class TransposeLayoutInfer : public OpLayoutInfer {
IPermuteVectorPtr final_pv = input_pv->Reverse()->Add(perm_pv);
if (final_pv->IsAligned()) {
//skip transpose op by treating its input as its output.
context_->UpdateTensorMap(op_->impl()->OutputsTensor()[0], infer_input);
//skip transpose op by insert a dummy reshape
// context_->UpdateTensorMap(op_->impl()->OutputsTensor()[0], infer_input);
auto reshape_op =
context_->infer_graph_->CreateOperation<tim::vx::ops::Reshape>(
op_->impl()->OutputsTensor()[0]->GetShape());
reshape_op->BindInput(infer_input);
auto reshape_out = CreateOutputsTensor(final_pv);
reshape_op->BindOutput(reshape_out[0]);
} else {
auto transpose_op =
context_->infer_graph_->CreateOperation<tim::vx::ops::Transpose>(