From b27e57cc4f4fdc3ba955639a44026265592e5e09 Mon Sep 17 00:00:00 2001 From: Gheorghe-Teodor Bercea Date: Mon, 20 Jul 2020 19:24:17 -0400 Subject: [PATCH] Emit allocs at the top of functions (#222) * Reorganize main function. * Follow review comments. * Emit constants are globals in Krnl and LLVM dialects. * Add support for moving dynamic alloca instructions to top of functions. * Fix memory pooling tests. * Various fixes. * Fix lit tests. * More test fixes. * Reformat. * Reformat some more. * Fix issue with TestConv and split-input-file. * Use smart pointers. * Remove redundant pointer. * Reformat. * Add initMap description. * Clean up tests. --- .../ONNXToKrnl/ConvertONNXToKrnl.cpp | 49 +- .../ONNXToKrnl/Math/Elementwise.cpp | 12 +- src/Conversion/ONNXToKrnl/Math/Gemm.cpp | 3 +- src/Conversion/ONNXToKrnl/Math/MatMul.cpp | 13 +- src/Conversion/ONNXToKrnl/Math/Reduction.cpp | 4 +- src/Conversion/ONNXToKrnl/Math/Softmax.cpp | 11 +- src/Conversion/ONNXToKrnl/NN/Conv.cpp | 5 +- .../ONNXToKrnl/NN/Normalization.cpp | 5 +- src/Conversion/ONNXToKrnl/NN/Pooling.cpp | 3 +- .../ONNXToKrnl/ONNXToKrnlCommon.cpp | 198 +++++- .../ONNXToKrnl/ONNXToKrnlCommon.hpp | 57 +- src/Conversion/ONNXToKrnl/RNN/LSTM.cpp | 19 +- src/Conversion/ONNXToKrnl/Tensor/Concat.cpp | 6 +- src/Conversion/ONNXToKrnl/Tensor/Identity.cpp | 1 + src/Conversion/ONNXToKrnl/Tensor/Pad.cpp | 4 +- .../ONNXToKrnl/Tensor/PadConstantValuePad.cpp | 4 +- src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp | 3 +- src/Conversion/ONNXToKrnl/Tensor/Split.cpp | 3 +- src/Conversion/ONNXToKrnl/Tensor/Squeeze.cpp | 3 +- .../ONNXToKrnl/Tensor/Transpose.cpp | 6 +- .../ONNXToKrnl/Tensor/Unsqueeze.cpp | 3 +- src/MainUtils.cpp | 1 + src/Transform/BundleMemoryPools.cpp | 2 - src/Transform/EnableMemoryPool.cpp | 4 +- test/mlir/krnl/constant.mlir | 4 +- test/mlir/krnl/memory_pool.mlir | 2 +- test/mlir/krnl/reshape.mlir | 4 +- test/mlir/onnx/onnx_bundle_memory_pool.mlir | 4 +- test/mlir/onnx/onnx_enable_memory_pool.mlir | 30 +- test/mlir/onnx/onnx_lowering.mlir | 601 +----------------- test/mlir/onnx/onnx_lowering_lstm.mlir | 263 ++++++++ test/mlir/onnx/onnx_lowering_pooling.mlir | 121 ++++ test/mlir/onnx/onnx_lowering_reductions.mlir | 93 +++ test/mlir/onnx/onnx_lowering_split.mlir | 85 +++ test/mlir/onnx/onnx_lowering_squeeze.mlir | 29 + .../mlir/onnx/onnx_lowering_with_dealloc.mlir | 100 +-- test/mlir/onnx/onnx_structure.mlir | 2 +- 37 files changed, 1032 insertions(+), 725 deletions(-) create mode 100644 test/mlir/onnx/onnx_lowering_lstm.mlir create mode 100644 test/mlir/onnx/onnx_lowering_pooling.mlir create mode 100644 test/mlir/onnx/onnx_lowering_reductions.mlir create mode 100644 test/mlir/onnx/onnx_lowering_split.mlir create mode 100644 test/mlir/onnx/onnx_lowering_squeeze.mlir diff --git a/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp b/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp index 2571458..38f860c 100644 --- a/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp +++ b/src/Conversion/ONNXToKrnl/ConvertONNXToKrnl.cpp @@ -1,5 +1,4 @@ -//====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering -//--------===// +//====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering -------===// // // Copyright 2019 The IBM Research Authors. // @@ -34,6 +33,38 @@ public: } }; +//===----------------------------------------------------------------------===// +// FuncOp lowering to Function with init and main blocks. +//===----------------------------------------------------------------------===// + +struct FuncOpSignatureConversion : public OpConversionPattern { + FuncOpSignatureConversion(MLIRContext *ctx, TypeConverter &converter) + : OpConversionPattern(converter, ctx) {} + + /// Hook for derived classes to implement combined matching and rewriting. + LogicalResult matchAndRewrite(FuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + FunctionType type = funcOp.getType(); + + // Convert the original function types. + TypeConverter::SignatureConversion result(type.getNumInputs()); + SmallVector newResults; + if (failed(typeConverter->convertSignatureArgs(type.getInputs(), result)) || + failed(typeConverter->convertTypes(type.getResults(), newResults)) || + failed(rewriter.convertRegionTypes( + &funcOp.getBody(), *typeConverter, &result))) + return failure(); + + // Update the function signature in-place. + rewriter.updateRootInPlace(funcOp, [&] { + funcOp.setType(FunctionType::get( + result.getConvertedTypes(), newResults, funcOp.getContext())); + }); + addInitBlock(rewriter, funcOp.getLoc(), funcOp); + return success(); + } +}; + //===----------------------------------------------------------------------===// // Frontend to Krnl Dialect lowering pass //===----------------------------------------------------------------------===// @@ -49,6 +80,10 @@ struct FrontendToKrnlLoweringPass void FrontendToKrnlLoweringPass::runOnOperation() { ModuleOp module = getOperation(); + // Create an entry for this module + initMap.insert(std::pair>( + module, std::make_unique())); + // The first thing to define is the conversion target. This will define the // final target for this lowering. ConversionTarget target(getContext()); @@ -77,12 +112,6 @@ void FrontendToKrnlLoweringPass::runOnOperation() { return tensor_to_memref_converter.isSignatureLegal(op.getType()); }); - // Type conversion for function signatures. - // Call MLIR FuncOp signature conversion when result type is - // a ranked tensor. - populateFuncOpTypeConversionPattern( - patterns, &getContext(), tensor_to_memref_converter); - // Frontend operation lowering. // Math populateLoweringONNXElementwiseOpPattern(patterns, &getContext()); @@ -109,12 +138,16 @@ void FrontendToKrnlLoweringPass::runOnOperation() { populateLoweringONNXLSTMOpPattern(patterns, &getContext()); // Entry point patterns.insert(&getContext()); + patterns.insert( + &getContext(), tensor_to_memref_converter); // With the target and rewrite patterns defined, we can now attempt the // conversion. The conversion will signal failure if any of our `illegal` // operations were not converted successfully. if (failed(applyPartialConversion(module, target, patterns))) signalPassFailure(); + + initMap.erase(module); } std::unique_ptr mlir::createLowerToKrnlPass() { diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp index 3bc2222..33391c0 100644 --- a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp @@ -518,10 +518,11 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern { bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); - else alloc = - insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, {X}); + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); + else + alloc = insertAllocAndDealloc( + memRefType, loc, rewriter, insertDealloc, op, {X}); SmallVector loopIVs; if (!hasAllScalarValues(operands)) { @@ -574,10 +575,11 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern { // comes from. // TODO: can the dimension of the result differ after optimizations? if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else alloc = insertAllocAndDealloc( - memRefType, loc, rewriter, insertDealloc, operands); + memRefType, loc, rewriter, insertDealloc, op, operands); SmallVector loopIVs; std::map> broadcastedDimInfo; diff --git a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp index cce0529..1d3fbcf 100644 --- a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp @@ -46,7 +46,8 @@ struct ONNXGemmOpLowering : public ConversionPattern { Value alloc; bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else { auto memRefShape = memRefType.getShape(); SmallVector allocOperands; diff --git a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp index 69f0006..702fd4e 100644 --- a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp +++ b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp @@ -43,8 +43,16 @@ struct ONNXMatMulOpLowering : public ConversionPattern { Value alloc; bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else { + PatternRewriter::InsertionGuard insertGuard(rewriter); + FuncOp function = getContainingFunction(op); + bool functionLevelAlloc = (op->getParentOp() == function); + bool canMove = checkAllocMovable(function, functionLevelAlloc, {A, B}); + if (canMove) + rewriter.setInsertionPoint(getInitInsertionPoint(function)); + SmallVector allocOperands; if (AShape.size() >= 2 && BShape.size() >= 2) { // Both arguments are N-D, N >= 2 @@ -108,6 +116,9 @@ struct ONNXMatMulOpLowering : public ConversionPattern { } alloc = rewriter.create(loc, memRefType, allocOperands); + + if (canMove) + markOperandInInitBlock(function, alloc); } if (AShape.size() >= 2 || BShape.size() >= 2) { diff --git a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp index 2a66c39..a029c34 100644 --- a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp @@ -159,8 +159,8 @@ struct ONNXReductionOpLowering : public ConversionPattern { Value alloc; bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefOutType)) { - alloc = - insertAllocAndDealloc(memRefOutType, loc, rewriter, insertDealloc); + alloc = insertAllocAndDealloc( + memRefOutType, loc, rewriter, insertDealloc, op); } else { SmallVector allocOperands; for (decltype(outRank) i = 0; i < outRank; ++i) { diff --git a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp index 44826e2..4e8986c 100644 --- a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp @@ -36,18 +36,21 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { Value alloc; bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else alloc = insertAllocAndDealloc( - memRefType, loc, rewriter, insertDealloc, input); + memRefType, loc, rewriter, insertDealloc, op, input); // Shape of the result auto memRefShape = memRefType.getShape(); // Insert allocations and deallocations for sum and max. MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0); - Value sumOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true); - Value maxOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true); + Value sumOp = + insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op); + Value maxOp = + insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op); Value zero = emitConstantOp(rewriter, loc, elementType, 0); Value negInfinity = rewriter.create(loc, FloatAttr::get(elementType, -std::numeric_limits::infinity())); diff --git a/src/Conversion/ONNXToKrnl/NN/Conv.cpp b/src/Conversion/ONNXToKrnl/NN/Conv.cpp index 607f43b..11fa940 100644 --- a/src/Conversion/ONNXToKrnl/NN/Conv.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Conv.cpp @@ -36,10 +36,11 @@ struct ONNXConvOpLowering : public ConversionPattern { bool hasBias = !biasOperand.getType().isa(); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else alloc = insertAllocAndDealloc( - memRefType, loc, rewriter, insertDealloc, {inputOperand}); + memRefType, loc, rewriter, insertDealloc, op, {inputOperand}); // R = Conv(D, K) // diff --git a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp index e160bea..a5959e7 100644 --- a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp @@ -42,10 +42,11 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else alloc = insertAllocAndDealloc( - memRefType, loc, rewriter, insertDealloc, {operand}); + memRefType, loc, rewriter, insertDealloc, op, {operand}); // Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N. // In case of N, C is assumed to be 1. diff --git a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp index d40de6f..ecc2286 100644 --- a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp @@ -235,7 +235,8 @@ struct ONNXPoolOpLowering : public ConversionPattern { bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else { alloc = insertAllocAndDeallocForPooling(rewriter, loc, insertDealloc, memRefType, inputOperand, kernelShape, pads, strides, dilations, diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp index c57d0a4..607e6dd 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp @@ -11,6 +11,8 @@ #include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp" +std::map> initMap; + /// Check is all dimensions are known at compile time. bool hasAllConstantDimensions(MemRefType type) { auto memRefShape = type.getShape(); @@ -43,11 +45,151 @@ MemRefType convertToMemRefType(Type type) { return memRefType; } +/// Retrieve function which contains the current operation. +FuncOp getContainingFunction(Operation *op) { + Operation *parentFuncOp = op->getParentOp(); + + // While parent is not a FuncOp and its cast to a FuncOp is null. + while (!llvm::dyn_cast_or_null(parentFuncOp)) + parentFuncOp = parentFuncOp->getParentOp(); + + return cast(parentFuncOp); +} + +void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp function) { + // If this is the first time we encounter an operation in this + // function, we create an entry inside the initMap and split the + // function body into an init block and a main block. + // + // function func_name() { + // ... init block ... + // br ^bb1 + // ^bb1: // pred: ^bb0 + // ... main block ... + // return + // } + // + // Note: the block ^bb0 being the first block has its label omitted. + // + ModuleOp module = cast(function.getParentOp()); + std::unique_ptr &initStates = initMap.at(module); + if (initStates->count(function) == 0) { + initStates->insert( + std::pair>( + function, std::make_unique())); + std::unique_ptr &initState = + initStates->at(function); + + // All input arguments are considered as part of the initialization block + // so add them to the operandsInInitBlock set. + Block *functionBlock = &function.front(); + for (auto arg : functionBlock->getArguments()) + initState->operandsInInitBlock.insert(arg); + + PatternRewriter::InsertionGuard insertGuard(rewriter); + rewriter.setInsertionPointToStart(functionBlock); + + initState->initBlock = rewriter.getInsertionBlock(); + auto currentPoint = rewriter.getInsertionPoint(); + initState->mainBlock = + rewriter.splitBlock(initState->initBlock, currentPoint); + + rewriter.setInsertionPointToEnd(initState->initBlock); + + // Insert a branch operation from initBlock to mainBlock. This + // ensures the final code contains legal blocks. + initState->branchInit = + rewriter.create(loc, initState->mainBlock); + + // Set insertion point to start of mainBlock. + rewriter.setInsertionPointToStart(initState->mainBlock); + } +} + +bool containingFunctionHasInitBlock(Operation *op) { + FuncOp function = getContainingFunction(op); + ModuleOp module = cast(function.getParentOp()); + std::unique_ptr &initStates = initMap.at(module); + return initStates->count(function) > 0; +} + +Block *getInitBlock(FuncOp function) { + ModuleOp module = cast(function.getParentOp()); + std::unique_ptr &initStates = initMap.at(module); + assert(initStates->count(function) > 0 && + "Initialization state not defined for this function."); + return initStates->at(function)->initBlock; +} + +Block *getMainBlock(FuncOp function) { + ModuleOp module = cast(function.getParentOp()); + std::unique_ptr &initStates = initMap.at(module); + assert(initStates->count(function) > 0 && + "Initialization state not defined for this function."); + return initStates->at(function)->mainBlock; +} + +BranchOp getInitInsertionPoint(FuncOp function) { + ModuleOp module = cast(function.getParentOp()); + std::unique_ptr &initStates = initMap.at(module); + assert(initStates->count(function) > 0 && + "Initialization state not defined for this function."); + return initStates->at(function)->branchInit; +} + +/// Check if all operands used for allocating the size of the result are +/// in the initialization block (i.e. initBlock). +bool checkAllocMovable( + FuncOp function, bool functionLevelAlloc, ArrayRef operands) { + // If no initialization block exists then alloc cannot be moved. + ModuleOp module = cast(function.getParentOp()); + std::unique_ptr &initStates = initMap.at(module); + if (initStates->count(function) == 0) + return false; + + // If the alloc is not function level alloc then it cannot be moved. + if (!functionLevelAlloc) + return false; + + bool allInitOrArg = true; + for (int i = 0; i < operands.size(); i++) { + if (initStates->at(function)->operandsInInitBlock.count(operands[i]) == 0) + allInitOrArg = false; + } + + return allInitOrArg; +} + +/// Add operand to list of operands in the init block. +void markOperandInInitBlock(FuncOp function, Value operand) { + // Check if function is valid. At this point it has to be. + assert(function && "Attempt to add operand when function is null."); + ModuleOp module = cast(function.getParentOp()); + std::unique_ptr &initStates = initMap.at(module); + // A valid function must have an initialization state. + assert(initStates->count(function) > 0 && + "Initialization state not defined for this function."); + initStates->at(function)->operandsInInitBlock.insert(operand); +} + /// Insert an allocation and deallocation for the given MemRefType. -Value insertAllocAndDealloc(MemRefType type, Location loc, - PatternRewriter &rewriter, bool insertDealloc, ArrayRef operands, - int64_t alignment) { +Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc, + PatternRewriter &rewriter, bool insertDealloc, FuncOp function, + bool functionLevelAlloc, ArrayRef operands, int64_t alignment) { // Put together alloc operands for any dynamic dimensions of the memref. + // Save insertion point in case we need to change it to the initBlock. + PatternRewriter::InsertionGuard insertGuard(rewriter); + + // Check if all operands of the alloc are in the init region or are input + // arguments. If some of them are not or there is no init block, this + // variable will be false. + bool canMove = checkAllocMovable(function, functionLevelAlloc, operands); + + // If a legal move to the init block is possible, set insertion point + // at the end of the initialization block just before the branch instruction. + if (canMove) + rewriter.setInsertionPoint(getInitInsertionPoint(function)); + AllocOp alloc; if (!operands.empty()) { auto memRefShape = type.getShape(); @@ -97,6 +239,11 @@ Value insertAllocAndDealloc(MemRefType type, Location loc, } else { alloc = rewriter.create(loc, type, allocOperands); } + + // If the alloc was emitted inside the initializatin block then mark add + // it to the set of values emitted in the initialization block. + if (canMove) + markOperandInInitBlock(function, alloc.getResult()); } else { // Set alignment attribute. Default value is `-1`, which does not set // alignment. @@ -113,17 +260,52 @@ Value insertAllocAndDealloc(MemRefType type, Location loc, // Make sure to allocate at the beginning of the block if // all dimensions are known. auto *parentBlock = alloc.getOperation()->getBlock(); - if (hasAllConstantDimensions(type)) - alloc.getOperation()->moveBefore(&parentBlock->front()); + if (hasAllConstantDimensions(type)) { + // Check if this move is a move to the init block or to the top of the + // function without an init block. For the case in which all dimensions + // are constant, the `canMove` variable will be false if there is no + // init block. + if (canMove) { + // The alloc was emitted in the init block already so just record + // that this value is not available in the init block. + alloc.getOperation()->moveBefore(&getInitBlock(function)->front()); + markOperandInInitBlock(function, alloc.getResult()); + } else { + // No init block exists in this case so just move it as before. + alloc.getOperation()->moveBefore(&parentBlock->front()); + } + } if (insertDealloc) { auto dealloc = rewriter.create(loc, alloc); - dealloc.getOperation()->moveBefore(&parentBlock->back()); + // Move dealloc to the end of the main block if such a block exists. + if (canMove) { + Block *mainBlock = getMainBlock(function); + dealloc.getOperation()->moveBefore(&mainBlock->back()); + } else { + // If no main block exists, move to parent block. + dealloc.getOperation()->moveBefore(&parentBlock->back()); + } } return alloc; } +/// Insert an allocation and deallocation for the given MemRefType. +Value insertAllocAndDealloc(MemRefType type, Location loc, + PatternRewriter &rewriter, bool insertDealloc, Operation *op, + ArrayRef operands, int64_t alignment) { + FuncOp function = getContainingFunction(op); + + bool functionLevelAlloc = (op->getParentOp() == function); + if (!functionLevelAlloc) { + printf("This is not a function level alloc!\n"); + } + + return insertAllocAndDeallocWithFunction(type, loc, rewriter, insertDealloc, + function, functionLevelAlloc, operands, alignment); +} + // Determine if current function returns the result value of the // current op being lowered. If it does then dealloc should not be // inserted. @@ -463,10 +645,10 @@ int64_t ArrayAttrIntVal(ArrayAttr a, int i) { } bool checkOpResultIsUsedByGetRef(AllocOp *allocOp) { - auto parentBlock = allocOp->getOperation()->getBlock(); + FuncOp function = getContainingFunction(allocOp->getOperation()); bool opIsUsedInGetRef = false; - parentBlock->walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) { + function.walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) { auto result = allocOp->getResult(); for (const auto &operand : op.getOperands()) if (operand == result) diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp index 6b6660c..caca3d8 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp @@ -19,7 +19,9 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SetVector.h" #include "src/Dialect/Krnl/KrnlHelper.hpp" #include "src/Dialect/Krnl/KrnlOps.hpp" @@ -29,6 +31,37 @@ using namespace mlir; +//===----------------------------------------------------------------------===// +// Insertion point for initialization instructions and the blocks used for +// inserting the initialization and main code. These blocks will disappear +// when the first canonicalization is performed because the init block +// unconditionally branches into the second block. These blocks exist only for +// the purpose of this optimization. +// The support happens on a per function basis. +//===----------------------------------------------------------------------===// + +typedef struct ONNXOperandsInitState { + Block *initBlock; + Block *mainBlock; + BranchOp branchInit; + llvm::SetVector operandsInInitBlock; +} ONNXOperandsInitState; + +typedef std::map> + FunctionToInitStates; + +// This map is used by the FrontendToKrnlLoweringPass pass to keep track of the +// allocations emitted in the initialization block for each function of a given +// module. A translation unit can consist of several modules, each with several +// functions hence the structure shown below. +// This data structure enables the emission of dyanmic `alloc` instructions +// in the initialization block of a function if all the other operands the +// computation of its parameters depends on are also present in that function's +// initialization block. +// This data structure is live only during the execution of the frontend +// lowering to Krnl dialect pass (FrontendToKrnlLoweringPass). +extern std::map> initMap; + //===----------------------------------------------------------------------===// // Common functions used when lowering the ONNX frontend dialect to KRNL. //===----------------------------------------------------------------------===// @@ -44,9 +77,14 @@ MemRefType convertToMemRefType(Type type); /// Insert an allocation and deallocation for the given MemRefType. Value insertAllocAndDealloc(MemRefType type, Location loc, - PatternRewriter &rewriter, bool insertDealloc, + PatternRewriter &rewriter, bool insertDealloc, Operation *op, ArrayRef operands = {}, int64_t alignment = -1); +Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc, + PatternRewriter &rewriter, bool insertDealloc, FuncOp function, + bool functionLevelAlloc, ArrayRef operands = {}, + int64_t alignment = -1); + // Determine if current function returns the result value of the // current op being lowered. If it does then dealloc should not be // inserted. @@ -246,3 +284,20 @@ void populateLoweringONNXSplitOpPattern( bool checkOpResultIsUsedByGetRef(AllocOp *allocOp); int64_t getMemRefSizeInBytes(Value val); + +FuncOp getContainingFunction(Operation *op); + +void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp op); + +bool containingFunctionHasInitBlock(Operation *op); + +Block *getInitBlock(FuncOp function); + +Block *getMainBlock(FuncOp function); + +BranchOp getInitInsertionPoint(FuncOp function); + +bool checkAllocMovable( + FuncOp function, bool functionLevelAlloc, ArrayRef operands); + +void markOperandInInitBlock(FuncOp function, Value operand); diff --git a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp index 808de78..1fb7755 100644 --- a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp +++ b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp @@ -161,13 +161,14 @@ LstmState allocAndInitializeStates( ConversionPatternRewriter &rewriter, Location loc, ONNXLSTMOp *op, typename ONNXLSTMOp::Adaptor operandAdaptor) { LstmState state; + FuncOp function = cast(op->getParentOp()); // Insert allocation and deallocation for the results of this operation. if (!isNoneType(op->Y())) { auto yMemRefType = convertToMemRefType(op->Y().getType()); if (hasAllConstantDimensions(yMemRefType)) - state.allH = insertAllocAndDealloc(yMemRefType, loc, rewriter, - checkInsertDealloc(op->getOperation(), 0)); + state.allH = insertAllocAndDeallocWithFunction(yMemRefType, loc, rewriter, + checkInsertDealloc(op->getOperation(), 0), function, true); else { llvm_unreachable("Unsupported dynamic dimensions."); } @@ -179,8 +180,8 @@ LstmState allocAndInitializeStates( if (!isNoneType(op->Y_h())) { auto yhMemRefType = convertToMemRefType(op->Y_h().getType()); if (hasAllConstantDimensions(yhMemRefType)) - state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter, - checkInsertDealloc(op->getOperation(), 1)); + state.ht = insertAllocAndDeallocWithFunction(yhMemRefType, loc, rewriter, + checkInsertDealloc(op->getOperation(), 1), function, true); else llvm_unreachable("Unsupported dynamic dimensions."); } else { @@ -188,15 +189,16 @@ LstmState allocAndInitializeStates( {dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1), dimAt(operandAdaptor.R(), 2)}, operandAdaptor.X().getType().cast().getElementType()); - state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter, true); + state.ht = insertAllocAndDeallocWithFunction( + yhMemRefType, loc, rewriter, true, function, true); } // Y_c :: [num_directions, batch_size, hidden_size] if (!isNoneType(op->Y_c())) { auto ycMemRefType = convertToMemRefType(op->Y_c().getType()); if (hasAllConstantDimensions(ycMemRefType)) - state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter, - checkInsertDealloc(op->getOperation(), 2)); + state.ct = insertAllocAndDeallocWithFunction(ycMemRefType, loc, rewriter, + checkInsertDealloc(op->getOperation(), 2), function, true); else llvm_unreachable("Unsupported dynamic dimensions."); } else { @@ -204,7 +206,8 @@ LstmState allocAndInitializeStates( {dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1), dimAt(operandAdaptor.R(), 2)}, operandAdaptor.X().getType().cast().getElementType()); - state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter, true); + state.ct = insertAllocAndDeallocWithFunction( + ycMemRefType, loc, rewriter, true, function, true); } // Initialize ht and ct. diff --git a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp index c335d82..1bef186 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp @@ -20,6 +20,7 @@ struct ONNXConcatOpLowering : public ConversionPattern { ConversionPatternRewriter &rewriter) const final { // Gather info. auto loc = op->getLoc(); + Value alloc; bool insertDealloc = checkInsertDealloc(op); ONNXConcatOp concatOp = llvm::dyn_cast(op); @@ -33,10 +34,11 @@ struct ONNXConcatOpLowering : public ConversionPattern { assert((axis >= 0 && axis < rank) && "Concat axis out of bounds"); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else alloc = insertAllocAndDealloc( - memRefType, loc, rewriter, insertDealloc, {resultOperand}); + memRefType, loc, rewriter, insertDealloc, op, {resultOperand}); // Creates loops, one for each input. int writeOffset = 0; diff --git a/src/Conversion/ONNXToKrnl/Tensor/Identity.cpp b/src/Conversion/ONNXToKrnl/Tensor/Identity.cpp index 3f0b305..e726e2a 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Identity.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Identity.cpp @@ -18,6 +18,7 @@ struct ONNXIdentityOpLowering : public ConversionPattern { LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { + auto loc = op->getLoc(); ONNXIdentityOpAdaptor operandAdaptor(operands); rewriter.replaceOp(op, operandAdaptor.input()); return success(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp index 2f34b87..705a77d 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp @@ -40,11 +40,13 @@ struct ONNXPadOpLowering : public ConversionPattern { return emitError(loc, "Pad: unknown pads"); auto memRefType = convertToMemRefType(tensorType); + Value alloc; bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else return emitError(loc, "unexpected output has non-Constant shape"); diff --git a/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp b/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp index ebc9195..6498f7e 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp @@ -32,11 +32,13 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern { // Insert an allocation and deallocation for the result of this operation. auto memRefType = convertToMemRefType(tensorType); + Value alloc; bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else return emitError(loc, "unexpected output has non-Constant shape"); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp b/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp index d7032a2..52c2db6 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp @@ -46,7 +46,8 @@ struct ONNXReshapeOpLowering : public ConversionPattern { bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) { - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); } else { // If a dimension is zero, the actual dimension value is taken from the // input tensor. diff --git a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp index 68e1ba3..3002668 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp @@ -40,7 +40,8 @@ struct ONNXSplitOpLowering : public ConversionPattern { auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType()); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else { SmallVector allocOperands; auto shape = memRefType.getShape(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Squeeze.cpp b/src/Conversion/ONNXToKrnl/Tensor/Squeeze.cpp index 87ca2fe..78b773a 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Squeeze.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Squeeze.cpp @@ -39,7 +39,8 @@ struct ONNXSqueezeOpLowering : public ConversionPattern { Value alloc, tensorSize; bool insertDealloc = checkInsertDealloc(op); if (hasAllConstantDimensions(memRefType)) { - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); auto tensorSizeInBytes = elementSizeInBytes; for (int i = 0; i < memRefShape.size(); ++i) { tensorSizeInBytes *= memRefShape[i]; diff --git a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp index d912f9e..a4d7e97 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp @@ -22,15 +22,17 @@ struct ONNXTransposeOpLowering : public ConversionPattern { auto loc = op->getLoc(); // Insert an allocation and deallocation for the result of this operation. auto memRefType = convertToMemRefType(*op->result_type_begin()); + Value alloc; bool insertDealloc = checkInsertDealloc(op); Value data = operandAdaptor.data(); if (hasAllConstantDimensions(memRefType)) - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); else alloc = insertAllocAndDealloc( - memRefType, loc, rewriter, insertDealloc, {data}); + memRefType, loc, rewriter, insertDealloc, op, {data}); // Number of loops auto memRefShape = memRefType.getShape(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Unsqueeze.cpp b/src/Conversion/ONNXToKrnl/Tensor/Unsqueeze.cpp index 254ddcf..c60654d 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Unsqueeze.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Unsqueeze.cpp @@ -44,7 +44,8 @@ struct ONNXUnsqueezeOpLowering : public ConversionPattern { bool insertDealloc = checkInsertDealloc(op); auto memRefShape = memRefType.getShape(); if (hasAllConstantDimensions(memRefType)) { - alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); + alloc = + insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op); for (int i = 0; i < memRefShape.size(); ++i) { Value dimVal = emitConstantOp( rewriter, loc, rewriter.getIntegerType(64), memRefShape[i]); diff --git a/src/MainUtils.cpp b/src/MainUtils.cpp index 0a43d1d..7eb7ebb 100644 --- a/src/MainUtils.cpp +++ b/src/MainUtils.cpp @@ -378,6 +378,7 @@ void addONNXToMLIRPasses(mlir::PassManager &pm) { void addONNXToKrnlPasses(mlir::PassManager &pm) { pm.addPass(mlir::createLowerToKrnlPass()); + pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(mlir::createPackKrnlGlobalConstantsPass()); // An additional pass of canonicalization is helpful because lowering // from ONNX dialect to Standard dialect exposes additional canonicalization diff --git a/src/Transform/BundleMemoryPools.cpp b/src/Transform/BundleMemoryPools.cpp index f8e1ddc..edfeb0a 100644 --- a/src/Transform/BundleMemoryPools.cpp +++ b/src/Transform/BundleMemoryPools.cpp @@ -87,8 +87,6 @@ public: // Get a KrnlGetRefOp which does not use the current alloc. if (KrnlGetRefOp unbundledGetRef = getUnbundledGetRef(&allocOp)) { - unbundledGetRef.dump(); - // Current memory pool size is the offset for the newly bundled // internal MemRef. Emit the offset as a constant. auto offset = rewriter.create( diff --git a/src/Transform/EnableMemoryPool.cpp b/src/Transform/EnableMemoryPool.cpp index c41293f..3ffa8c5 100644 --- a/src/Transform/EnableMemoryPool.cpp +++ b/src/Transform/EnableMemoryPool.cpp @@ -24,10 +24,10 @@ using namespace mlir; namespace { bool checkOpResultIsReturned(AllocOp *allocOp) { - auto parentBlock = allocOp->getOperation()->getBlock(); + FuncOp function = getContainingFunction(allocOp->getOperation()); bool opIsReturned = false; - parentBlock->walk([&opIsReturned, allocOp](ReturnOp op) { + function.walk([&opIsReturned, allocOp](ReturnOp op) { auto result = allocOp->getResult(); for (const auto &operand : op.getOperands()) if (operand == result) diff --git a/test/mlir/krnl/constant.mlir b/test/mlir/krnl/constant.mlir index 8f71c7c..acf2b51 100644 --- a/test/mlir/krnl/constant.mlir +++ b/test/mlir/krnl/constant.mlir @@ -1,6 +1,4 @@ -// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s - -// ----- +// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> { %0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32> diff --git a/test/mlir/krnl/memory_pool.mlir b/test/mlir/krnl/memory_pool.mlir index 49fad5c..d013e29 100644 --- a/test/mlir/krnl/memory_pool.mlir +++ b/test/mlir/krnl/memory_pool.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --lower-krnl --lower-all-llvm %s | FileCheck %s func @test_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { %0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32> diff --git a/test/mlir/krnl/reshape.mlir b/test/mlir/krnl/reshape.mlir index dc105d2..9389818 100644 --- a/test/mlir/krnl/reshape.mlir +++ b/test/mlir/krnl/reshape.mlir @@ -1,6 +1,4 @@ -// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s - -// ----- +// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s func @test_reshape(%arg0 : tensor, %arg1 : tensor<4xi64>) -> tensor<*xf32> { %0 = "onnx.Reshape"(%arg0, %arg1) : (tensor, tensor<4xi64>) -> tensor<*xf32> diff --git a/test/mlir/onnx/onnx_bundle_memory_pool.mlir b/test/mlir/onnx/onnx_bundle_memory_pool.mlir index f6288a8..6450453 100644 --- a/test/mlir/onnx/onnx_bundle_memory_pool.mlir +++ b/test/mlir/onnx/onnx_bundle_memory_pool.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --bundle-memory-pools --canonicalize %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --bundle-memory-pools --canonicalize %s | FileCheck %s func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>) -> tensor<10x20xf32> { %0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32> @@ -10,8 +10,8 @@ func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32> return %5 : tensor<10x20xf32> // CHECK-LABEL: test_bundle_memory_pool - // CHECK: [[CONST0:%.+]] = constant 0 : i64 // CHECK: [[CONST00:%.+]] = constant 0.000000e+00 : f32 + // CHECK: [[CONST0:%.+]] = constant 0 : i64 // CHECK: [[CONST400:%.+]] = constant 400 : i64 // CHECK: [[CONST1200:%.+]] = constant 1200 : i64 // CHECK: [[CONST2000:%.+]] = constant 2000 : i64 diff --git a/test/mlir/onnx/onnx_enable_memory_pool.mlir b/test/mlir/onnx/onnx_enable_memory_pool.mlir index 62b305c..f18bc14 100644 --- a/test/mlir/onnx/onnx_enable_memory_pool.mlir +++ b/test/mlir/onnx/onnx_enable_memory_pool.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool %s | FileCheck %s /// One intermediate value to allocate in the memory pool. func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { @@ -13,10 +13,10 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { // CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> // CHECK: krnl.define_loops // CHECK: krnl.iterate - // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32> // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32> + // CHECK: affine.store [[ADDF1]], [[GETREF]][symbol(%arg1), symbol(%arg2)] : memref<10x10xf32> // CHECK: krnl.define_loops // CHECK: krnl.iterate // CHECK: dealloc [[MEMPOOL]] : memref<400xi8> @@ -31,8 +31,8 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3 return %2 : tensor<10x20xf32> // CHECK-LABEL: test_enable_memory_pool_2 - // CHECK: [[CONST0:%.+]] = constant 0 : i64 // CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32 + // CHECK: [[CONST0:%.+]] = constant 0 : i64 // CHECK: [[RES:%.+]] = alloc() : memref<10x20xf32> // CHECK: [[MEMPOOL0:%.+]] = alloc() : memref<800xi8> // CHECK: [[GETREF0:%.+]] = "krnl.getref"([[MEMPOOL0]], [[CONST0]]) : (memref<800xi8>, i64) -> memref<10x20xf32> @@ -40,24 +40,24 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3 // CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> // CHECK: krnl.define_loops // CHECK: krnl.iterate - // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32> // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADDF1]], [[GETREF1]][symbol(%arg2), symbol(%arg3)] : memref<10x10xf32> // CHECK: krnl.define_loops // CHECK: krnl.iterate - // CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32> - // CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32> - // CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> + // CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][symbol(%arg2), symbol(%arg4)] : memref<10x10xf32> + // CHECK: [[LOAD4:%.+]] = affine.load %arg1[symbol(%arg4), symbol(%arg3)] : memref<10x20xf32> + // CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32 // CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32 - // CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> + // CHECK: affine.store [[ADDF2]], [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: krnl.define_loops // CHECK: krnl.iterate - // CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> - // CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32> + // CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> + // CHECK: [[LOAD7:%.+]] = affine.load %arg1[symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32 - // CHECK: affine.store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32> + // CHECK: affine.store [[ADDF3]], [[RES]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: dealloc [[MEMPOOL1]] : memref<400xi8> // CHECK: dealloc [[MEMPOOL0]] : memref<800xi8> // CHECK: return [[RES]] : memref<10x20xf32> diff --git a/test/mlir/onnx/onnx_lowering.mlir b/test/mlir/onnx/onnx_lowering.mlir index 5aa3ef7..f6c6dfa 100644 --- a/test/mlir/onnx/onnx_lowering.mlir +++ b/test/mlir/onnx/onnx_lowering.mlir @@ -692,100 +692,6 @@ func @test_add_with_broadcasting(%arg0 : tensor, %arg1 : tensor // CHECK: } // CHECK: return [[RES]] : memref } - -// ----- - -func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { - %0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: test_reducemax - // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> - // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { - // CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32 - // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> - - // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> - // CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32 - // CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32 - // CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> - // CHECK: } - // CHECK: return [[RES]] : memref<3x2xf32> -} - -// ----- - -func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { - %0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: test_reducemin - // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> - // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { - // CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32 - // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> - - // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> - // CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32 - // CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32 - // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> - // CHECK: } - // CHECK: return [[RES]] : memref<3x2xf32> -} - -// ----- - -func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { - %0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: test_reduceprod - // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> - // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { - // CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32 - // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> - - // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> - // CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32 - // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> - // CHECK: } - // CHECK: return [[RES]] : memref<3x2xf32> -} - -// ----- - -func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { - %0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: test_reducesum - // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> - // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { - // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 - // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> - - // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> - // CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32 - // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> - // CHECK: } - // CHECK: return [[RES]] : memref<3x2xf32> -} // ----- @@ -1107,10 +1013,10 @@ func @test_matmul5(%arg0 : tensor<5xf32>, %arg1 : tensor) -> tensor< "std.return"(%0) : (tensor<*xf32>) -> () // CHECK-LABEL: test_matmul5 - // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref @@ -1139,10 +1045,10 @@ func @test_matmul6(%arg0 : tensor, %arg1 : tensor<5xf32>) -> tensor< "std.return"(%0) : (tensor<*xf32>) -> () // CHECK-LABEL: test_matmul6 - // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref @@ -1515,506 +1421,3 @@ func @test_concat_1(%arg0 : tensor<5x5x1x32xf32>, %arg1 : tensor<5x5x3x32xf32>, // CHECK: return [[RES]] : memref<5x5x9x32xf32> } - -// ----- - -func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { - %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)> - // CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)> - // CHECK-DAG: #{{.*}} = affine_map<() -> (0)> - // CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)> - - // CHECK-LABEL: @test_pool_general_computation - - // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> - // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 - - // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { - - // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - - // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { - // CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> - // CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: } - - // CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: } -} - -// ----- - -func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> { - %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-DAG: #[[AFFINE_MAP:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)> - // CHECK-LABEL: test_pool_unknown_dimensions - // CHECK: [[C0:%.+]] = constant 2 : index - // CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32> - // CHECK: [[KERNEL:%.+]] = constant 2 : index - // CHECK: [[PAD:%.+]] = constant 0 : index - // CHECK: [[STRIDE:%.+]] = constant 1 : index - // CHECK: [[DILATION:%.+]] = constant 1 : index - // CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}} - // CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32> -} - -// ----- - -func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { - %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: @test_averagepool_identity_value - // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> - // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 - // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> -} - -// ----- - -func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { - %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: @test_maxpool_identity_value - // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> - // CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32 - // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> -} - -// ----- - -func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { - %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: @test_averagepool_pooling_operation - // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> - - // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { - - // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { - - // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> - // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 - // CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: } - - // CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32 - // CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: } -} - -// ----- - -func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { - %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: @test_maxpool_pooling_operation - // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> - - // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { - - // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { - - // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> - // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 - // CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 - // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: } - - // CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: } -} - -// ----- - -func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> { - %cst = constant unit - %Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none) - return %Y_h : tensor<*xf32> - - // CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)> - // CHECK-LABEL: @test_lstm_general_computation - - // CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32> - // CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32> - // CHECK: {{.*}} = constant unit - - // CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32 - // CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) { - // CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> - // CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> - // CHECK: } - - // CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { - // CHECK: {{.*}} = constant 0 : index - // CHECK: {{.*}} = constant 3 : index - // CHECK: {{.*}} = constant 0 : index - // CHECK: {{.*}} = constant 1 : index - // CHECK: {{.*}} = constant 2 : index - // CHECK: {{.*}} = constant 3 : index - // CHECK: {{.*}} = constant 4 : index - // CHECK: {{.*}} = constant 5 : index - // CHECK: {{.*}} = constant 6 : index - // CHECK: {{.*}} = constant 7 : index - // CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) { - // CHECK: [[hCt:%.+]] = alloc() : memref - // CHECK: [[Ot:%.+]] = alloc() : memref - // CHECK: [[ct:%.+]] = alloc() : memref - // CHECK: [[Ft:%.+]] = alloc() : memref - // CHECK: [[It:%.+]] = alloc() : memref - // CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> - // CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> - - // CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32 - // CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref - // CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref - // CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref - // CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref - // CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref - // CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref - // CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref - // CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref - // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref - - // CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) { - // CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3] - // CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3] - // CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3] - // CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3] - // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32> - - // CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> - // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref - - // CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> - // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref - - // CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> - // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref - - // CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> - // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref - - // CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> - // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref - - // CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> - // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref - - // CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> - // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref - - // CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> - // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32 - // CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref - // CHECK: } - - // CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref - // CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref - // CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32 - - // CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref - // CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref - // CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref - // CHECK: {{.*}} = constant 0.000000e+00 : f32 - // CHECK: {{.*}} = constant 1.000000e+00 : f32 - // CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32 - // CHECK: {{.*}} = exp {{.*}} : f32 - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[It]][] : memref - // CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref - - // CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref - // CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref - // CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32 - - // CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref - // CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref - // CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref - // CHECK: {{.*}} = constant 0.000000e+00 : f32 - // CHECK: {{.*}} = constant 1.000000e+00 : f32 - // CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32 - // CHECK: {{.*}} = exp {{.*}} : f32 - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[Ft]][] : memref - // CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref - - // CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref - // CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref - // CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32 - - // CHECK: [[TANH_CELL:%.+]] = alloc() : memref - // CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref - // CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref - // CHECK: {{.*}} = constant 0.000000e+00 : f32 - // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = exp {{.*}} : f32 - // CHECK: {{.*}} = exp {{.*}} : f32 - // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[ct]][] : memref - // CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref - - // CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32 - // CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32 - // CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32 - // CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> - - // CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref - // CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref - // CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32 - - // CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref - // CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref - // CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref - // CHECK: {{.*}} = constant 0.000000e+00 : f32 - // CHECK: {{.*}} = constant 1.000000e+00 : f32 - // CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32 - // CHECK: {{.*}} = exp {{.*}} : f32 - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[Ot]][] : memref - // CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref - - // CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref - // CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref - // CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref - // CHECK: {{.*}} = constant 0.000000e+00 : f32 - // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = exp {{.*}} : f32 - // CHECK: {{.*}} = exp {{.*}} : f32 - // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 - // CHECK: affine.store {{.*}}, [[hCt]][] : memref - // CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref - - // CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32 - // CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> - - // CHECK: dealloc [[XtWi_GEMM]] : memref - // CHECK: dealloc [[XtWo_GEMM]] : memref - // CHECK: dealloc [[XtWf_GEMM]] : memref - // CHECK: dealloc [[XtWc_GEMM]] : memref - // CHECK: dealloc [[Ht1Ri_GEMM]] : memref - // CHECK: dealloc [[Ht1Ro_GEMM]] : memref - // CHECK: dealloc [[Ht1Rf_GEMM]] : memref - // CHECK: dealloc [[Ht1Rc_GEMM]] : memref - // CHECK: dealloc [[It]] : memref - // CHECK: dealloc [[Ft]] : memref - // CHECK: dealloc [[ct]] : memref - // CHECK: dealloc [[Ot]] : memref - // CHECK: dealloc [[hCt]] : memref - // CHECK: } - // CHECK: } - // CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32> - // CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32> -} - -// ----- - -func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> { - %cst = constant unit - %Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none) - return %Y_h : tensor<*xf32> - - // CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)> - // CHECK-LABEL: @test_lstm_reverse_mode - - // CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { - // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index - // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}} - // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> -} - -// ----- - -func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> { - %cst = constant unit - %Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none) - return %Y_h : tensor<*xf32> - - // CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)> - // CHECK-LABEL: @test_lstm_bidirectional_mode - - // CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { - // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32> - - // CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { - // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index - // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}} - // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> -} - -// ----- - -func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> { - %0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>) - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: @test_squeeze - // CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32> - // CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64 - // CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> () - // CHECK: return [[RES]] : memref<16x32x64xf32> -} - -// ----- - -func @test_squeeze_unknown_dimensions(%arg0 : tensor) -> tensor<*xf32> { - %0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor) -> (tensor<*xf32>) - "std.return"(%0) : (tensor<*xf32>) -> () - - // CHECK-LABEL: @test_squeeze_unknown_dimensions - // CHECK: [[C0:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref - // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref - // CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64 - // CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64 - // CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64 - // CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref, memref, i64) -> () - // CHECK: return [[RES]] : memref -} - -// ----- - -func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) { - %0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) - "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () - - // CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 8)> - // CHECK-LABEL: @test_split_equal - - // CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32> - // CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32> - // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { - // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> - // CHECK: } - // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { - // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1) - // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> - // CHECK: } - // CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32> -} - -// ----- - -func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) { - %0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) - "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () - - // CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)> - // CHECK-LABEL: @test_split_variable - - // CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32> - // CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32> - // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { - // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32> - // CHECK: } - // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 - // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { - // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) - // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32> - // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32> - // CHECK: } - // CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32> -} - -// ----- - -func @test_split_unknown_dimension(%arg0 : tensor) -> (tensor<*xf32>, tensor<*xf32>) { - %0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor) -> (tensor<*xf32>, tensor<*xf32>) - "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () - - // CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)> - // CHECK-LABEL: @test_split_unknown_dimension - - // CHECK: [[C0:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref - // CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref - // CHECK: [[C0_0:%.+]] = constant 0 : index - // CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref - // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[C0_2:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref - // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { - // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref - // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref - // CHECK: } - // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[C0_3:%.+]] = constant 0 : index - // CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref - // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { - // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) - // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref - // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref - // CHECK: } - // CHECK: return [[RES_0]], [[RES_1]] : memref, memref -} diff --git a/test/mlir/onnx/onnx_lowering_lstm.mlir b/test/mlir/onnx/onnx_lowering_lstm.mlir new file mode 100644 index 0000000..92a90ed --- /dev/null +++ b/test/mlir/onnx/onnx_lowering_lstm.mlir @@ -0,0 +1,263 @@ +// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s + +func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> { + %cst = constant unit + %Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none) + return %Y_h : tensor<*xf32> + + // CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)> + + // CHECK-LABEL: @test_lstm_general_computation + + // CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32> + // CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32> + // CHECK: {{.*}} = constant unit + + // CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32 + // CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) { + // CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: } + + // CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 + // CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: {{.*}} = constant 0 : index + // CHECK: {{.*}} = constant 3 : index + // CHECK: {{.*}} = constant 0 : index + // CHECK: {{.*}} = constant 1 : index + // CHECK: {{.*}} = constant 2 : index + // CHECK: {{.*}} = constant 3 : index + // CHECK: {{.*}} = constant 4 : index + // CHECK: {{.*}} = constant 5 : index + // CHECK: {{.*}} = constant 6 : index + // CHECK: {{.*}} = constant 7 : index + // CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) { + // CHECK: [[hCt:%.+]] = alloc() : memref + // CHECK: [[Ot:%.+]] = alloc() : memref + // CHECK: [[ct:%.+]] = alloc() : memref + // CHECK: [[Ft:%.+]] = alloc() : memref + // CHECK: [[It:%.+]] = alloc() : memref + // CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + + // CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32 + // CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref + // CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref + // CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref + // CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref + // CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref + // CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref + // CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref + // CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref + + // CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1 + // CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) { + // CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3] + // CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3] + // CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3] + // CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3] + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32> + + // CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref + + // CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref + + // CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref + + // CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref + + // CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref + + // CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref + + // CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref + + // CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32 + // CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref + // CHECK: } + + // CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref + // CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref + // CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32 + + // CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref + // CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref + // CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref + // CHECK: {{.*}} = constant 0.000000e+00 : f32 + // CHECK: {{.*}} = constant 1.000000e+00 : f32 + // CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32 + // CHECK: {{.*}} = exp {{.*}} : f32 + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[It]][] : memref + // CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref + + // CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref + // CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref + // CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32 + + // CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref + // CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref + // CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref + // CHECK: {{.*}} = constant 0.000000e+00 : f32 + // CHECK: {{.*}} = constant 1.000000e+00 : f32 + // CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32 + // CHECK: {{.*}} = exp {{.*}} : f32 + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[Ft]][] : memref + // CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref + + // CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref + // CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref + // CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32 + + // CHECK: [[TANH_CELL:%.+]] = alloc() : memref + // CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref + // CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref + // CHECK: {{.*}} = constant 0.000000e+00 : f32 + // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = exp {{.*}} : f32 + // CHECK: {{.*}} = exp {{.*}} : f32 + // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[ct]][] : memref + // CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref + + // CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32 + // CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32 + // CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32 + // CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + + // CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref + // CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref + // CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32 + + // CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref + // CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref + // CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref + // CHECK: {{.*}} = constant 0.000000e+00 : f32 + // CHECK: {{.*}} = constant 1.000000e+00 : f32 + // CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32 + // CHECK: {{.*}} = exp {{.*}} : f32 + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[Ot]][] : memref + // CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref + + // CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref + // CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref + // CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref + // CHECK: {{.*}} = constant 0.000000e+00 : f32 + // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = exp {{.*}} : f32 + // CHECK: {{.*}} = exp {{.*}} : f32 + // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 + // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 + // CHECK: affine.store {{.*}}, [[hCt]][] : memref + // CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref + + // CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32 + // CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + + // CHECK: dealloc [[XtWi_GEMM]] : memref + // CHECK: dealloc [[XtWo_GEMM]] : memref + // CHECK: dealloc [[XtWf_GEMM]] : memref + // CHECK: dealloc [[XtWc_GEMM]] : memref + // CHECK: dealloc [[Ht1Ri_GEMM]] : memref + // CHECK: dealloc [[Ht1Ro_GEMM]] : memref + // CHECK: dealloc [[Ht1Rf_GEMM]] : memref + // CHECK: dealloc [[Ht1Rc_GEMM]] : memref + // CHECK: dealloc [[It]] : memref + // CHECK: dealloc [[Ft]] : memref + // CHECK: dealloc [[ct]] : memref + // CHECK: dealloc [[Ot]] : memref + // CHECK: dealloc [[hCt]] : memref + // CHECK: } + // CHECK: } + // CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32> + // CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32> +} + +// ----- + +func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> { + %cst = constant unit + %Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none) + return %Y_h : tensor<*xf32> + + // CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)> + + // CHECK-LABEL: @test_lstm_reverse_mode + + // CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 + // CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index + // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}} + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> +} + +// ----- + +func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> { + %cst = constant unit + %Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none) + return %Y_h : tensor<*xf32> + + // CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)> + + // CHECK-LABEL: @test_lstm_bidirectional_mode + + // CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 + // CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32> + + // CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 + // CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index + // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}} + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> +} diff --git a/test/mlir/onnx/onnx_lowering_pooling.mlir b/test/mlir/onnx/onnx_lowering_pooling.mlir new file mode 100644 index 0000000..c4e4ae8 --- /dev/null +++ b/test/mlir/onnx/onnx_lowering_pooling.mlir @@ -0,0 +1,121 @@ +// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s | FileCheck %s + +// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)> +// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)> +// CHECK-DAG: #{{.*}} = affine_map<() -> (0)> +// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)> + +// CHECK-DAG: #[[AFFINE_MAP1:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)> + +func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { + %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: @test_pool_general_computation + + // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> + // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 + + // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 + // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { + + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + + // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + // CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> + // CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: } + + // CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: } +} + +func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> { + %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: test_pool_unknown_dimensions + // CHECK: [[C0:%.+]] = constant 2 : index + // CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32> + // CHECK: [[KERNEL:%.+]] = constant 2 : index + // CHECK: [[PAD:%.+]] = constant 0 : index + // CHECK: [[STRIDE:%.+]] = constant 1 : index + // CHECK: [[DILATION:%.+]] = constant 1 : index + // CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP1]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}} + // CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32> +} + +func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { + %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: @test_averagepool_identity_value + // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> + // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> +} + +func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { + %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: @test_maxpool_identity_value + // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> + // CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32 + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> +} + +func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { + %0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: @test_averagepool_pooling_operation + // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> + + // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 + // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { + + // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + + // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> + // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 + // CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: } + + // CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32 + // CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: } +} + +// ----- + +func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> { + %0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: @test_maxpool_pooling_operation + // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> + + // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 + // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { + + // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + + // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> + // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 + // CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 + // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: } + + // CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: } +} diff --git a/test/mlir/onnx/onnx_lowering_reductions.mlir b/test/mlir/onnx/onnx_lowering_reductions.mlir new file mode 100644 index 0000000..99d0a6c --- /dev/null +++ b/test/mlir/onnx/onnx_lowering_reductions.mlir @@ -0,0 +1,93 @@ +// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s + +func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: test_reducemax + // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> + // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32 + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + + // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32 + // CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32 + // CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: } + // CHECK: return [[RES]] : memref<3x2xf32> +} + +// ----- + +func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: test_reducemin + // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> + // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32 + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + + // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32 + // CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32 + // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: } + // CHECK: return [[RES]] : memref<3x2xf32> +} + +// ----- + +func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: test_reduceprod + // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> + // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32 + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + + // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32 + // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: } + // CHECK: return [[RES]] : memref<3x2xf32> +} + +// ----- + +func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { + %0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32> + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: test_reducesum + // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> + // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + + // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32 + // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: } + // CHECK: return [[RES]] : memref<3x2xf32> +} diff --git a/test/mlir/onnx/onnx_lowering_split.mlir b/test/mlir/onnx/onnx_lowering_split.mlir new file mode 100644 index 0000000..e393ac1 --- /dev/null +++ b/test/mlir/onnx/onnx_lowering_split.mlir @@ -0,0 +1,85 @@ +// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s + +func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) { + %0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) + "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () + + // CHECK: [[INDEX_MAP1:#.+]] = affine_map<(d0) -> (d0 + 8)> + + // CHECK-LABEL: @test_split_equal + + // CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32> + // CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32> + // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> + // CHECK: } + // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { + // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP1]](%arg1) + // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> + // CHECK: } + // CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32> +} + +// ----- + +func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) { + %0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) + "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () + + // CHECK: [[INDEX_MAP2:#.+]] = affine_map<(d0) -> (d0 + 2)> + + // CHECK-LABEL: @test_split_variable + + // CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32> + // CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32> + // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32> + // CHECK: } + // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 + // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { + // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP2]](%arg2) + // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32> + // CHECK: } + // CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32> +} + +// ----- + +func @test_split_unknown_dimension(%arg0 : tensor) -> (tensor<*xf32>, tensor<*xf32>) { + %0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor) -> (tensor<*xf32>, tensor<*xf32>) + "std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> () + + // CHECK: [[INDEX_MAP3:#.+]] = affine_map<(d0) -> (d0 + 2)> + + // CHECK-LABEL: @test_split_unknown_dimension + + // CHECK: [[C0:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref + // CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_0:%.+]] = constant 0 : index + // CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref + // CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref + // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 + // CHECK: [[C0_2:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref + // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref + // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref + // CHECK: } + // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 + // CHECK: [[C0_3:%.+]] = constant 0 : index + // CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref + // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { + // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP3]](%arg2) + // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref + // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref + // CHECK: } + // CHECK: return [[RES_0]], [[RES_1]] : memref, memref +} diff --git a/test/mlir/onnx/onnx_lowering_squeeze.mlir b/test/mlir/onnx/onnx_lowering_squeeze.mlir new file mode 100644 index 0000000..d0f49e7 --- /dev/null +++ b/test/mlir/onnx/onnx_lowering_squeeze.mlir @@ -0,0 +1,29 @@ +// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s + +func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> { + %0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>) + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: @test_squeeze + // CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32> + // CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64 + // CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> () + // CHECK: return [[RES]] : memref<16x32x64xf32> +} + +// ----- + +func @test_squeeze_unknown_dimensions(%arg0 : tensor) -> tensor<*xf32> { + %0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor) -> (tensor<*xf32>) + "std.return"(%0) : (tensor<*xf32>) -> () + + // CHECK-LABEL: @test_squeeze_unknown_dimensions + // CHECK: [[C0:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref + // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64 + // CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64 + // CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64 + // CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref, memref, i64) -> () + // CHECK: return [[RES]] : memref +} diff --git a/test/mlir/onnx/onnx_lowering_with_dealloc.mlir b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir index ccf653b..64e61a6 100644 --- a/test/mlir/onnx/onnx_lowering_with_dealloc.mlir +++ b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir @@ -239,10 +239,15 @@ func @test_exp_exp(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_exp_exp - /// First Exp + // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Exp // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -252,9 +257,6 @@ func @test_exp_exp(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref /// Second Exp - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -278,10 +280,14 @@ func @test_tanh_tanh(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_tanh_tanh - /// First Tanh // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Tanh // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -297,9 +303,6 @@ func @test_tanh_tanh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref /// Second Tanh - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -329,10 +332,14 @@ func @test_sinh_sinh(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_sinh_sinh - /// First Sinh // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_0:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Sinh // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -348,9 +355,6 @@ func @test_sinh_sinh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref /// Second Sinh - // CHECK: [[C0_0:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -380,10 +384,14 @@ func @test_cosh_cosh(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_cosh_cosh - /// First Cosh // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Cosh // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -399,9 +407,6 @@ func @test_cosh_cosh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref /// Second Cosh - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -430,10 +435,14 @@ func @test_sigmoid_sigmoid(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_sigmoid_sigmoid - /// First Sigmoid // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Sigmoid // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -448,9 +457,6 @@ func @test_sigmoid_sigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref /// Second Sigmoid - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -479,10 +485,14 @@ func @test_relu_relu(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_relu_relu - /// First Relu // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Relu // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -494,9 +504,6 @@ func @test_relu_relu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref /// Second Relu - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -625,10 +632,14 @@ func @test_elu_elu(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_elu_elu - /// First Elu // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Elu // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -645,9 +656,6 @@ func @test_elu_elu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref /// Second Elu - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -678,10 +686,14 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_leakyrelu_leakyrelu - /// First LeakyRelu // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First LeakyRelu // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -695,9 +707,6 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref /// Second LeakyRelu - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -725,10 +734,14 @@ func @test_selu_selu(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_selu_selu - /// First Selu // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Selu // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -746,9 +759,6 @@ func @test_selu_selu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref /// Second Selu - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -780,10 +790,14 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_hardsigmoid_hardsigmoid - /// First HardSigmoid // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First HardSigmoid // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -802,9 +816,6 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref /// Second HardSigmoid - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref @@ -837,10 +848,14 @@ func @test_reciprocal_reciprocal(%arg0 : tensor) -> tensor<*xf32> { "std.return"(%1) : (tensor<*xf32>) -> () // CHECK-LABEL: test_reciprocal_reciprocal - /// First Reciprocal // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref + // CHECK: [[C0_1:%.+]] = constant 0 : index + // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref + // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref + + /// First Reciprocal // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref @@ -851,9 +866,6 @@ func @test_reciprocal_reciprocal(%arg0 : tensor) -> tensor<*xf32> { // CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref /// Second Reciprocal - // CHECK: [[C0_1:%.+]] = constant 0 : index - // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref diff --git a/test/mlir/onnx/onnx_structure.mlir b/test/mlir/onnx/onnx_structure.mlir index c295171..c8ce47c 100644 --- a/test/mlir/onnx/onnx_structure.mlir +++ b/test/mlir/onnx/onnx_structure.mlir @@ -1,4 +1,4 @@ -// RUN: onnx-mlir-opt %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt %s | FileCheck %s //===----------------------------------------------------------------------===// // CHECK-LABEL: @check_map1(%arg0: tuple) -> tensor<*xf32> {