From 4f2991c853090dcfde88339ed76dca4f421a46e3 Mon Sep 17 00:00:00 2001 From: Sven Date: Fri, 13 May 2022 16:59:25 +0800 Subject: [PATCH] Fixed no-output if transpose is last op and can be optimized (#395) * Fixed no-output if transpose is last op and can be optimized If transpose can be erased by layout inference, replace it as a reshape - input and output have same shape - expect low-level optimization erase the reshape Signed-off-by: xiang.zhang --- .github/workflows/cmake_x86_vsim.yml | 40 +++++++++++++------ .../ops/transpose_layout_inference.h | 10 ++++- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/.github/workflows/cmake_x86_vsim.yml b/.github/workflows/cmake_x86_vsim.yml index e2231db..cb25dca 100644 --- a/.github/workflows/cmake_x86_vsim.yml +++ b/.github/workflows/cmake_x86_vsim.yml @@ -117,7 +117,7 @@ jobs: # AI-Benchmark 5.0.1 model zoo mobilenet_v2_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download binary uses: actions/download-artifact@v3 @@ -132,7 +132,7 @@ jobs: mobilenet_v2_b8_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download binary uses: actions/download-artifact@v3 @@ -146,7 +146,7 @@ jobs: resnet_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -160,7 +160,7 @@ jobs: inception_v3_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -174,7 +174,7 @@ jobs: mobilenet_v3_b4_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -188,7 +188,7 @@ jobs: mobilenet_v3_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -202,7 +202,7 @@ jobs: mv3_depth_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -216,7 +216,7 @@ jobs: yolo_v4_tiny_quant: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -231,7 +231,7 @@ jobs: # Disable huge compilation cost # deeplab_v3_plus_quant: # runs-on: ubuntu-latest - # needs: vx-delegate-build + # needs: [vx-delegate-build, tim-vx-unit-test] # steps: # - name: download test binary # uses: actions/download-artifact@v3 @@ -261,7 +261,7 @@ jobs: tfhub-efficientdet-lite0: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -275,7 +275,7 @@ jobs: tfhub-efficientdet-lite1: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -289,7 +289,7 @@ jobs: tfhub-efficientdet-lite2: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -303,7 +303,7 @@ jobs: tfhub-efficientdet-lite3: runs-on: ubuntu-latest - needs: vx-delegate-build + needs: [vx-delegate-build, tim-vx-unit-test] steps: - name: download test binary uses: actions/download-artifact@v3 @@ -315,6 +315,20 @@ jobs: chmod u+x ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model --num_runs=1 --external_delegate_path=${{github.workspace}}/vx-delegate-bin/libvx_delegate.so --graph=${{github.workspace}}/1.tflite + # acuity-yolov3-608-quant: + # runs-on: ubuntu-latest + # needs: [vx-delegate-build, tim-vx-unit-test] + # steps: + # - name: download test binary + # uses: actions/download-artifact@v3 + # - name: download model + # run: | + # curl -LJO https://github.com/sunshinemyson/TIM-VX/releases/download/v1.1.30.2/yolov3_608relu_quant.acuity.tflite + # - name: benchmark-model + # run: | + # chmod u+x ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model + # ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model --num_runs=1 --external_delegate_path=${{github.workspace}}/vx-delegate-bin/libvx_delegate.so --graph=${{github.workspace}}/yolov3_608relu_quant.acuity.tflite + # Graph compilation time is huge over 20mins # tfhub-efficientdet-lite4: # runs-on: ubuntu-latest diff --git a/src/tim/transform/ops/transpose_layout_inference.h b/src/tim/transform/ops/transpose_layout_inference.h index f128aff..bda00a4 100644 --- a/src/tim/transform/ops/transpose_layout_inference.h +++ b/src/tim/transform/ops/transpose_layout_inference.h @@ -56,8 +56,14 @@ class TransposeLayoutInfer : public OpLayoutInfer { IPermuteVectorPtr final_pv = input_pv->Reverse()->Add(perm_pv); if (final_pv->IsAligned()) { - //skip transpose op by treating its input as its output. - context_->UpdateTensorMap(op_->impl()->OutputsTensor()[0], infer_input); + //skip transpose op by insert a dummy reshape + // context_->UpdateTensorMap(op_->impl()->OutputsTensor()[0], infer_input); + auto reshape_op = + context_->infer_graph_->CreateOperation( + op_->impl()->OutputsTensor()[0]->GetShape()); + reshape_op->BindInput(infer_input); + auto reshape_out = CreateOutputsTensor(final_pv); + reshape_op->BindOutput(reshape_out[0]); } else { auto transpose_op = context_->infer_graph_->CreateOperation(