Fixed no-output if transpose is last op and can be optimized (#395)
* Fixed no-output if transpose is last op and can be optimized If transpose can be erased by layout inference, replace it as a reshape - input and output have same shape - expect low-level optimization erase the reshape Signed-off-by: xiang.zhang <xiang.zhang@verisilicon.com>
This commit is contained in:
parent
b3677305c4
commit
4f2991c853
|
|
@ -117,7 +117,7 @@ jobs:
|
|||
# AI-Benchmark 5.0.1 model zoo
|
||||
mobilenet_v2_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -132,7 +132,7 @@ jobs:
|
|||
|
||||
mobilenet_v2_b8_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -146,7 +146,7 @@ jobs:
|
|||
|
||||
resnet_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -160,7 +160,7 @@ jobs:
|
|||
|
||||
inception_v3_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -174,7 +174,7 @@ jobs:
|
|||
|
||||
mobilenet_v3_b4_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -188,7 +188,7 @@ jobs:
|
|||
|
||||
mobilenet_v3_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -202,7 +202,7 @@ jobs:
|
|||
|
||||
mv3_depth_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -216,7 +216,7 @@ jobs:
|
|||
|
||||
yolo_v4_tiny_quant:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -231,7 +231,7 @@ jobs:
|
|||
# Disable huge compilation cost
|
||||
# deeplab_v3_plus_quant:
|
||||
# runs-on: ubuntu-latest
|
||||
# needs: vx-delegate-build
|
||||
# needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
# steps:
|
||||
# - name: download test binary
|
||||
# uses: actions/download-artifact@v3
|
||||
|
|
@ -261,7 +261,7 @@ jobs:
|
|||
|
||||
tfhub-efficientdet-lite0:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -275,7 +275,7 @@ jobs:
|
|||
|
||||
tfhub-efficientdet-lite1:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -289,7 +289,7 @@ jobs:
|
|||
|
||||
tfhub-efficientdet-lite2:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -303,7 +303,7 @@ jobs:
|
|||
|
||||
tfhub-efficientdet-lite3:
|
||||
runs-on: ubuntu-latest
|
||||
needs: vx-delegate-build
|
||||
needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
steps:
|
||||
- name: download test binary
|
||||
uses: actions/download-artifact@v3
|
||||
|
|
@ -315,6 +315,20 @@ jobs:
|
|||
chmod u+x ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model
|
||||
${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model --num_runs=1 --external_delegate_path=${{github.workspace}}/vx-delegate-bin/libvx_delegate.so --graph=${{github.workspace}}/1.tflite
|
||||
|
||||
# acuity-yolov3-608-quant:
|
||||
# runs-on: ubuntu-latest
|
||||
# needs: [vx-delegate-build, tim-vx-unit-test]
|
||||
# steps:
|
||||
# - name: download test binary
|
||||
# uses: actions/download-artifact@v3
|
||||
# - name: download model
|
||||
# run: |
|
||||
# curl -LJO https://github.com/sunshinemyson/TIM-VX/releases/download/v1.1.30.2/yolov3_608relu_quant.acuity.tflite
|
||||
# - name: benchmark-model
|
||||
# run: |
|
||||
# chmod u+x ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model
|
||||
# ${{github.workspace}}/vx-delegate-bin/_deps/tensorflow-build/tools/benchmark/benchmark_model --num_runs=1 --external_delegate_path=${{github.workspace}}/vx-delegate-bin/libvx_delegate.so --graph=${{github.workspace}}/yolov3_608relu_quant.acuity.tflite
|
||||
|
||||
# Graph compilation time is huge over 20mins
|
||||
# tfhub-efficientdet-lite4:
|
||||
# runs-on: ubuntu-latest
|
||||
|
|
|
|||
|
|
@ -56,8 +56,14 @@ class TransposeLayoutInfer : public OpLayoutInfer {
|
|||
IPermuteVectorPtr final_pv = input_pv->Reverse()->Add(perm_pv);
|
||||
|
||||
if (final_pv->IsAligned()) {
|
||||
//skip transpose op by treating its input as its output.
|
||||
context_->UpdateTensorMap(op_->impl()->OutputsTensor()[0], infer_input);
|
||||
//skip transpose op by insert a dummy reshape
|
||||
// context_->UpdateTensorMap(op_->impl()->OutputsTensor()[0], infer_input);
|
||||
auto reshape_op =
|
||||
context_->infer_graph_->CreateOperation<tim::vx::ops::Reshape>(
|
||||
op_->impl()->OutputsTensor()[0]->GetShape());
|
||||
reshape_op->BindInput(infer_input);
|
||||
auto reshape_out = CreateOutputsTensor(final_pv);
|
||||
reshape_op->BindOutput(reshape_out[0]);
|
||||
} else {
|
||||
auto transpose_op =
|
||||
context_->infer_graph_->CreateOperation<tim::vx::ops::Transpose>(
|
||||
|
|
|
|||
Loading…
Reference in New Issue