From 7fb2f80dcecc1f1b65634ece8a86b0aef0eedc6f Mon Sep 17 00:00:00 2001 From: GHEORGHE-TEOD BERCEA Date: Wed, 27 Nov 2019 23:52:05 -0500 Subject: [PATCH] [MLIR] Add support for dealloc insertion (#386) * Add support for dealloc op. * Check dealloc for returned result not present. --- src/compiler/pass/lower_frontend_to_krnl.cpp | 43 +++++++++++++++--- .../mlir/onnx/onnx_lowering_with_dealloc.mlir | 45 +++++++++++++++++++ 2 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 test/mlir/onnx/onnx_lowering_with_dealloc.mlir diff --git a/src/compiler/pass/lower_frontend_to_krnl.cpp b/src/compiler/pass/lower_frontend_to_krnl.cpp index 430818b..d30c7bf 100644 --- a/src/compiler/pass/lower_frontend_to_krnl.cpp +++ b/src/compiler/pass/lower_frontend_to_krnl.cpp @@ -44,8 +44,9 @@ static MemRefType convertTensorToMemRef(TensorType type) { } /// Insert an allocation and deallocation for the given MemRefType. -static Value* insertAllocAndDealloc(MemRefType type, Location loc, - PatternRewriter& rewriter, Value* oldMemRef = nullptr) { +static Value* insertAllocAndDealloc( + MemRefType type, Location loc, PatternRewriter& rewriter, + bool insertDealloc, Value *oldMemRef = nullptr) { // Put together alloc operands for any dynamic dimensions of the memref. AllocOp alloc; if (oldMemRef) { @@ -54,7 +55,6 @@ static Value* insertAllocAndDealloc(MemRefType type, Location loc, for (int i = 0; i < memRefShape.size(); ++i) if (memRefShape[i] < 0) allocOperands.push_back(rewriter.create(loc, oldMemRef, i)); - alloc = rewriter.create(loc, type, allocOperands); } else { alloc = rewriter.create(loc, type); @@ -66,9 +66,36 @@ static Value* insertAllocAndDealloc(MemRefType type, Location loc, if (hasAllConstantDimensions(type)) alloc.getOperation()->moveBefore(&parentBlock->front()); + if (insertDealloc) { + auto dealloc = rewriter.create(loc, alloc); + dealloc.getOperation()->moveBefore(&parentBlock->back()); + } + return alloc; } +// Determine if current function returns the result value of the +// current op being lowered. If it does then dealloc should not be +// inserted. +static bool checkInsertDealloc(Operation *currentOp) { + auto parentBlock = currentOp->getBlock(); + + bool insertDealloc = true; + parentBlock->walk([&insertDealloc, currentOp](ReturnOp op) { + assert(currentOp->getNumResults() < 2 && + "No more than one result supported (for now)."); + // If there is at least one result to investigate. + if (currentOp->getNumResults() > 0) { + auto result = currentOp->getResult(0); + for(auto operand : op.getOperands()) + if (operand == result) + insertDealloc = false; + } + }); + + return insertDealloc; +} + namespace { //===----------------------------------------------------------------------===// @@ -95,11 +122,15 @@ struct ONNXAddOpLowering : public ConversionPattern { // dimensions with the result at this pre-optimization phase. // TODO: verify that dimensions match. // TODO: can the dimension of the result differ after optimizations? - Value* alloc; + Value *alloc; + bool insertDealloc = checkInsertDealloc(op); + if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter); + alloc = insertAllocAndDealloc( + memRefType, loc, rewriter, insertDealloc); else - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, operands[0]); + alloc = insertAllocAndDealloc( + memRefType, loc, rewriter, insertDealloc, operands[0]); // Number of loops auto memRefShape = memRefType.getShape(); diff --git a/test/mlir/onnx/onnx_lowering_with_dealloc.mlir b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir new file mode 100644 index 0000000..a86ce64 --- /dev/null +++ b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir @@ -0,0 +1,45 @@ +// RUN: dlc-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s + +module { + func @test_sigmoid(%a1 : tensor, %a2 : tensor) -> tensor<*xf32> { + %0 = "onnx.Add"(%a1, %a2) : (tensor, tensor) -> tensor<*xf32> + %1 = "onnx.Add"(%0, %a2) : (tensor<*xf32>, tensor) -> tensor<*xf32> + "std.return"(%1) : (tensor<*xf32>) -> () + } +} + +// CHECK: func @test_sigmoid([[ARG0:%.+]]: memref, [[ARG1:%.+]]: memref) -> memref { + +/// First Add +// CHECK: [[DIM_0:%.+]] = dim [[ARG0]], 0 : memref +// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref +// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 +// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { +// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 +// CHECK: } : () -> (!krnl.loop, !krnl.loop) +// CHECK: [[DIM_2:%.+]] = dim [[ARG0]], 0 : memref +// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { +// CHECK: [[LOAD1:%.+]] = load [[ARG0]][%arg2, %arg3] : memref +// CHECK: [[LOAD2:%.+]] = load [[ARG1]][%arg2, %arg3] : memref +// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 +// CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref + +/// Second Add +// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref +// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref +// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 +// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { +// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 +// CHECK: } : () -> (!krnl.loop, !krnl.loop) +// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref +// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { +// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref +// CHECK: [[LOAD2:%.+]] = load [[ARG1]][%arg2, %arg3] : memref +// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 +// CHECK: store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref + +/// Dealloc of first result. +// CHECK: dealloc [[RES]] : memref +// CHECK-NOT: dealloc [[RET_RES]] : memref + +// CHECK: return [[RET_RES]] : memref