Revert "Emit allocs at the top of functions (#222)" (#226)

This reverts commit b27e57cc4f.
This commit is contained in:
Gheorghe-Teodor Bercea 2020-07-21 18:30:39 -04:00 committed by GitHub
parent b27e57cc4f
commit a58594ec81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 725 additions and 1032 deletions

View File

@ -1,4 +1,5 @@
//====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering -------===// //====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering
//--------===//
// //
// Copyright 2019 The IBM Research Authors. // Copyright 2019 The IBM Research Authors.
// //
@ -33,38 +34,6 @@ public:
} }
}; };
//===----------------------------------------------------------------------===//
// FuncOp lowering to Function with init and main blocks.
//===----------------------------------------------------------------------===//
struct FuncOpSignatureConversion : public OpConversionPattern<FuncOp> {
FuncOpSignatureConversion(MLIRContext *ctx, TypeConverter &converter)
: OpConversionPattern(converter, ctx) {}
/// Hook for derived classes to implement combined matching and rewriting.
LogicalResult matchAndRewrite(FuncOp funcOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
FunctionType type = funcOp.getType();
// Convert the original function types.
TypeConverter::SignatureConversion result(type.getNumInputs());
SmallVector<Type, 1> newResults;
if (failed(typeConverter->convertSignatureArgs(type.getInputs(), result)) ||
failed(typeConverter->convertTypes(type.getResults(), newResults)) ||
failed(rewriter.convertRegionTypes(
&funcOp.getBody(), *typeConverter, &result)))
return failure();
// Update the function signature in-place.
rewriter.updateRootInPlace(funcOp, [&] {
funcOp.setType(FunctionType::get(
result.getConvertedTypes(), newResults, funcOp.getContext()));
});
addInitBlock(rewriter, funcOp.getLoc(), funcOp);
return success();
}
};
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Frontend to Krnl Dialect lowering pass // Frontend to Krnl Dialect lowering pass
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -80,10 +49,6 @@ struct FrontendToKrnlLoweringPass
void FrontendToKrnlLoweringPass::runOnOperation() { void FrontendToKrnlLoweringPass::runOnOperation() {
ModuleOp module = getOperation(); ModuleOp module = getOperation();
// Create an entry for this module
initMap.insert(std::pair<ModuleOp, std::unique_ptr<FunctionToInitStates>>(
module, std::make_unique<FunctionToInitStates>()));
// The first thing to define is the conversion target. This will define the // The first thing to define is the conversion target. This will define the
// final target for this lowering. // final target for this lowering.
ConversionTarget target(getContext()); ConversionTarget target(getContext());
@ -112,6 +77,12 @@ void FrontendToKrnlLoweringPass::runOnOperation() {
return tensor_to_memref_converter.isSignatureLegal(op.getType()); return tensor_to_memref_converter.isSignatureLegal(op.getType());
}); });
// Type conversion for function signatures.
// Call MLIR FuncOp signature conversion when result type is
// a ranked tensor.
populateFuncOpTypeConversionPattern(
patterns, &getContext(), tensor_to_memref_converter);
// Frontend operation lowering. // Frontend operation lowering.
// Math // Math
populateLoweringONNXElementwiseOpPattern(patterns, &getContext()); populateLoweringONNXElementwiseOpPattern(patterns, &getContext());
@ -138,16 +109,12 @@ void FrontendToKrnlLoweringPass::runOnOperation() {
populateLoweringONNXLSTMOpPattern(patterns, &getContext()); populateLoweringONNXLSTMOpPattern(patterns, &getContext());
// Entry point // Entry point
patterns.insert<ONNXEntryPointLowering>(&getContext()); patterns.insert<ONNXEntryPointLowering>(&getContext());
patterns.insert<FuncOpSignatureConversion>(
&getContext(), tensor_to_memref_converter);
// With the target and rewrite patterns defined, we can now attempt the // With the target and rewrite patterns defined, we can now attempt the
// conversion. The conversion will signal failure if any of our `illegal` // conversion. The conversion will signal failure if any of our `illegal`
// operations were not converted successfully. // operations were not converted successfully.
if (failed(applyPartialConversion(module, target, patterns))) if (failed(applyPartialConversion(module, target, patterns)))
signalPassFailure(); signalPassFailure();
initMap.erase(module);
} }
std::unique_ptr<Pass> mlir::createLowerToKrnlPass() { std::unique_ptr<Pass> mlir::createLowerToKrnlPass() {

View File

@ -518,11 +518,10 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
alloc = insertAllocAndDealloc( alloc =
memRefType, loc, rewriter, insertDealloc, op, {X}); insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, {X});
SmallVector<Value, 4> loopIVs; SmallVector<Value, 4> loopIVs;
if (!hasAllScalarValues(operands)) { if (!hasAllScalarValues(operands)) {
@ -575,11 +574,10 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
// comes from. // comes from.
// TODO: can the dimension of the result differ after optimizations? // TODO: can the dimension of the result differ after optimizations?
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, op, operands); memRefType, loc, rewriter, insertDealloc, operands);
SmallVector<Value, 4> loopIVs; SmallVector<Value, 4> loopIVs;
std::map<int, std::map<int, Value>> broadcastedDimInfo; std::map<int, std::map<int, Value>> broadcastedDimInfo;

View File

@ -46,8 +46,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else { else {
auto memRefShape = memRefType.getShape(); auto memRefShape = memRefType.getShape();
SmallVector<Value, 2> allocOperands; SmallVector<Value, 2> allocOperands;

View File

@ -43,16 +43,8 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else { else {
PatternRewriter::InsertionGuard insertGuard(rewriter);
FuncOp function = getContainingFunction(op);
bool functionLevelAlloc = (op->getParentOp() == function);
bool canMove = checkAllocMovable(function, functionLevelAlloc, {A, B});
if (canMove)
rewriter.setInsertionPoint(getInitInsertionPoint(function));
SmallVector<Value, 4> allocOperands; SmallVector<Value, 4> allocOperands;
if (AShape.size() >= 2 && BShape.size() >= 2) { if (AShape.size() >= 2 && BShape.size() >= 2) {
// Both arguments are N-D, N >= 2 // Both arguments are N-D, N >= 2
@ -116,9 +108,6 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
} }
alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands); alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands);
if (canMove)
markOperandInInitBlock(function, alloc);
} }
if (AShape.size() >= 2 || BShape.size() >= 2) { if (AShape.size() >= 2 || BShape.size() >= 2) {

View File

@ -159,8 +159,8 @@ struct ONNXReductionOpLowering : public ConversionPattern {
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefOutType)) { if (hasAllConstantDimensions(memRefOutType)) {
alloc = insertAllocAndDealloc( alloc =
memRefOutType, loc, rewriter, insertDealloc, op); insertAllocAndDealloc(memRefOutType, loc, rewriter, insertDealloc);
} else { } else {
SmallVector<Value, 2> allocOperands; SmallVector<Value, 2> allocOperands;
for (decltype(outRank) i = 0; i < outRank; ++i) { for (decltype(outRank) i = 0; i < outRank; ++i) {

View File

@ -36,21 +36,18 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, op, input); memRefType, loc, rewriter, insertDealloc, input);
// Shape of the result // Shape of the result
auto memRefShape = memRefType.getShape(); auto memRefShape = memRefType.getShape();
// Insert allocations and deallocations for sum and max. // Insert allocations and deallocations for sum and max.
MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0); MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0);
Value sumOp = Value sumOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true);
insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op); Value maxOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true);
Value maxOp =
insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op);
Value zero = emitConstantOp(rewriter, loc, elementType, 0); Value zero = emitConstantOp(rewriter, loc, elementType, 0);
Value negInfinity = rewriter.create<ConstantOp>(loc, Value negInfinity = rewriter.create<ConstantOp>(loc,
FloatAttr::get(elementType, -std::numeric_limits<float>::infinity())); FloatAttr::get(elementType, -std::numeric_limits<float>::infinity()));

View File

@ -36,11 +36,10 @@ struct ONNXConvOpLowering : public ConversionPattern {
bool hasBias = !biasOperand.getType().isa<NoneType>(); bool hasBias = !biasOperand.getType().isa<NoneType>();
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, op, {inputOperand}); memRefType, loc, rewriter, insertDealloc, {inputOperand});
// R = Conv(D, K) // R = Conv(D, K)
// //

View File

@ -42,11 +42,10 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, op, {operand}); memRefType, loc, rewriter, insertDealloc, {operand});
// Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N. // Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N.
// In case of N, C is assumed to be 1. // In case of N, C is assumed to be 1.

View File

@ -235,8 +235,7 @@ struct ONNXPoolOpLowering : public ConversionPattern {
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else { else {
alloc = insertAllocAndDeallocForPooling(rewriter, loc, insertDealloc, alloc = insertAllocAndDeallocForPooling(rewriter, loc, insertDealloc,
memRefType, inputOperand, kernelShape, pads, strides, dilations, memRefType, inputOperand, kernelShape, pads, strides, dilations,

View File

@ -11,8 +11,6 @@
#include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp" #include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
std::map<ModuleOp, std::unique_ptr<FunctionToInitStates>> initMap;
/// Check is all dimensions are known at compile time. /// Check is all dimensions are known at compile time.
bool hasAllConstantDimensions(MemRefType type) { bool hasAllConstantDimensions(MemRefType type) {
auto memRefShape = type.getShape(); auto memRefShape = type.getShape();
@ -45,151 +43,11 @@ MemRefType convertToMemRefType(Type type) {
return memRefType; return memRefType;
} }
/// Retrieve function which contains the current operation.
FuncOp getContainingFunction(Operation *op) {
Operation *parentFuncOp = op->getParentOp();
// While parent is not a FuncOp and its cast to a FuncOp is null.
while (!llvm::dyn_cast_or_null<FuncOp>(parentFuncOp))
parentFuncOp = parentFuncOp->getParentOp();
return cast<FuncOp>(parentFuncOp);
}
void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp function) {
// If this is the first time we encounter an operation in this
// function, we create an entry inside the initMap and split the
// function body into an init block and a main block.
//
// function func_name() {
// ... init block ...
// br ^bb1
// ^bb1: // pred: ^bb0
// ... main block ...
// return
// }
//
// Note: the block ^bb0 being the first block has its label omitted.
//
ModuleOp module = cast<ModuleOp>(function.getParentOp());
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
if (initStates->count(function) == 0) {
initStates->insert(
std::pair<FuncOp, std::unique_ptr<ONNXOperandsInitState>>(
function, std::make_unique<ONNXOperandsInitState>()));
std::unique_ptr<ONNXOperandsInitState> &initState =
initStates->at(function);
// All input arguments are considered as part of the initialization block
// so add them to the operandsInInitBlock set.
Block *functionBlock = &function.front();
for (auto arg : functionBlock->getArguments())
initState->operandsInInitBlock.insert(arg);
PatternRewriter::InsertionGuard insertGuard(rewriter);
rewriter.setInsertionPointToStart(functionBlock);
initState->initBlock = rewriter.getInsertionBlock();
auto currentPoint = rewriter.getInsertionPoint();
initState->mainBlock =
rewriter.splitBlock(initState->initBlock, currentPoint);
rewriter.setInsertionPointToEnd(initState->initBlock);
// Insert a branch operation from initBlock to mainBlock. This
// ensures the final code contains legal blocks.
initState->branchInit =
rewriter.create<BranchOp>(loc, initState->mainBlock);
// Set insertion point to start of mainBlock.
rewriter.setInsertionPointToStart(initState->mainBlock);
}
}
bool containingFunctionHasInitBlock(Operation *op) {
FuncOp function = getContainingFunction(op);
ModuleOp module = cast<ModuleOp>(function.getParentOp());
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
return initStates->count(function) > 0;
}
Block *getInitBlock(FuncOp function) {
ModuleOp module = cast<ModuleOp>(function.getParentOp());
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
assert(initStates->count(function) > 0 &&
"Initialization state not defined for this function.");
return initStates->at(function)->initBlock;
}
Block *getMainBlock(FuncOp function) {
ModuleOp module = cast<ModuleOp>(function.getParentOp());
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
assert(initStates->count(function) > 0 &&
"Initialization state not defined for this function.");
return initStates->at(function)->mainBlock;
}
BranchOp getInitInsertionPoint(FuncOp function) {
ModuleOp module = cast<ModuleOp>(function.getParentOp());
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
assert(initStates->count(function) > 0 &&
"Initialization state not defined for this function.");
return initStates->at(function)->branchInit;
}
/// Check if all operands used for allocating the size of the result are
/// in the initialization block (i.e. initBlock).
bool checkAllocMovable(
FuncOp function, bool functionLevelAlloc, ArrayRef<Value> operands) {
// If no initialization block exists then alloc cannot be moved.
ModuleOp module = cast<ModuleOp>(function.getParentOp());
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
if (initStates->count(function) == 0)
return false;
// If the alloc is not function level alloc then it cannot be moved.
if (!functionLevelAlloc)
return false;
bool allInitOrArg = true;
for (int i = 0; i < operands.size(); i++) {
if (initStates->at(function)->operandsInInitBlock.count(operands[i]) == 0)
allInitOrArg = false;
}
return allInitOrArg;
}
/// Add operand to list of operands in the init block.
void markOperandInInitBlock(FuncOp function, Value operand) {
// Check if function is valid. At this point it has to be.
assert(function && "Attempt to add operand when function is null.");
ModuleOp module = cast<ModuleOp>(function.getParentOp());
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
// A valid function must have an initialization state.
assert(initStates->count(function) > 0 &&
"Initialization state not defined for this function.");
initStates->at(function)->operandsInInitBlock.insert(operand);
}
/// Insert an allocation and deallocation for the given MemRefType. /// Insert an allocation and deallocation for the given MemRefType.
Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc, Value insertAllocAndDealloc(MemRefType type, Location loc,
PatternRewriter &rewriter, bool insertDealloc, FuncOp function, PatternRewriter &rewriter, bool insertDealloc, ArrayRef<Value> operands,
bool functionLevelAlloc, ArrayRef<Value> operands, int64_t alignment) { int64_t alignment) {
// Put together alloc operands for any dynamic dimensions of the memref. // Put together alloc operands for any dynamic dimensions of the memref.
// Save insertion point in case we need to change it to the initBlock.
PatternRewriter::InsertionGuard insertGuard(rewriter);
// Check if all operands of the alloc are in the init region or are input
// arguments. If some of them are not or there is no init block, this
// variable will be false.
bool canMove = checkAllocMovable(function, functionLevelAlloc, operands);
// If a legal move to the init block is possible, set insertion point
// at the end of the initialization block just before the branch instruction.
if (canMove)
rewriter.setInsertionPoint(getInitInsertionPoint(function));
AllocOp alloc; AllocOp alloc;
if (!operands.empty()) { if (!operands.empty()) {
auto memRefShape = type.getShape(); auto memRefShape = type.getShape();
@ -239,11 +97,6 @@ Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
} else { } else {
alloc = rewriter.create<AllocOp>(loc, type, allocOperands); alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
} }
// If the alloc was emitted inside the initializatin block then mark add
// it to the set of values emitted in the initialization block.
if (canMove)
markOperandInInitBlock(function, alloc.getResult());
} else { } else {
// Set alignment attribute. Default value is `-1`, which does not set // Set alignment attribute. Default value is `-1`, which does not set
// alignment. // alignment.
@ -260,52 +113,17 @@ Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
// Make sure to allocate at the beginning of the block if // Make sure to allocate at the beginning of the block if
// all dimensions are known. // all dimensions are known.
auto *parentBlock = alloc.getOperation()->getBlock(); auto *parentBlock = alloc.getOperation()->getBlock();
if (hasAllConstantDimensions(type)) { if (hasAllConstantDimensions(type))
// Check if this move is a move to the init block or to the top of the
// function without an init block. For the case in which all dimensions
// are constant, the `canMove` variable will be false if there is no
// init block.
if (canMove) {
// The alloc was emitted in the init block already so just record
// that this value is not available in the init block.
alloc.getOperation()->moveBefore(&getInitBlock(function)->front());
markOperandInInitBlock(function, alloc.getResult());
} else {
// No init block exists in this case so just move it as before.
alloc.getOperation()->moveBefore(&parentBlock->front()); alloc.getOperation()->moveBefore(&parentBlock->front());
}
}
if (insertDealloc) { if (insertDealloc) {
auto dealloc = rewriter.create<DeallocOp>(loc, alloc); auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
// Move dealloc to the end of the main block if such a block exists.
if (canMove) {
Block *mainBlock = getMainBlock(function);
dealloc.getOperation()->moveBefore(&mainBlock->back());
} else {
// If no main block exists, move to parent block.
dealloc.getOperation()->moveBefore(&parentBlock->back()); dealloc.getOperation()->moveBefore(&parentBlock->back());
} }
}
return alloc; return alloc;
} }
/// Insert an allocation and deallocation for the given MemRefType.
Value insertAllocAndDealloc(MemRefType type, Location loc,
PatternRewriter &rewriter, bool insertDealloc, Operation *op,
ArrayRef<Value> operands, int64_t alignment) {
FuncOp function = getContainingFunction(op);
bool functionLevelAlloc = (op->getParentOp() == function);
if (!functionLevelAlloc) {
printf("This is not a function level alloc!\n");
}
return insertAllocAndDeallocWithFunction(type, loc, rewriter, insertDealloc,
function, functionLevelAlloc, operands, alignment);
}
// Determine if current function returns the result value of the // Determine if current function returns the result value of the
// current op being lowered. If it does then dealloc should not be // current op being lowered. If it does then dealloc should not be
// inserted. // inserted.
@ -645,10 +463,10 @@ int64_t ArrayAttrIntVal(ArrayAttr a, int i) {
} }
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp) { bool checkOpResultIsUsedByGetRef(AllocOp *allocOp) {
FuncOp function = getContainingFunction(allocOp->getOperation()); auto parentBlock = allocOp->getOperation()->getBlock();
bool opIsUsedInGetRef = false; bool opIsUsedInGetRef = false;
function.walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) { parentBlock->walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) {
auto result = allocOp->getResult(); auto result = allocOp->getResult();
for (const auto &operand : op.getOperands()) for (const auto &operand : op.getOperands())
if (operand == result) if (operand == result)

View File

@ -19,9 +19,7 @@
#include "mlir/Pass/Pass.h" #include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Sequence.h" #include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SetVector.h"
#include "src/Dialect/Krnl/KrnlHelper.hpp" #include "src/Dialect/Krnl/KrnlHelper.hpp"
#include "src/Dialect/Krnl/KrnlOps.hpp" #include "src/Dialect/Krnl/KrnlOps.hpp"
@ -31,37 +29,6 @@
using namespace mlir; using namespace mlir;
//===----------------------------------------------------------------------===//
// Insertion point for initialization instructions and the blocks used for
// inserting the initialization and main code. These blocks will disappear
// when the first canonicalization is performed because the init block
// unconditionally branches into the second block. These blocks exist only for
// the purpose of this optimization.
// The support happens on a per function basis.
//===----------------------------------------------------------------------===//
typedef struct ONNXOperandsInitState {
Block *initBlock;
Block *mainBlock;
BranchOp branchInit;
llvm::SetVector<Value> operandsInInitBlock;
} ONNXOperandsInitState;
typedef std::map<FuncOp, std::unique_ptr<ONNXOperandsInitState>>
FunctionToInitStates;
// This map is used by the FrontendToKrnlLoweringPass pass to keep track of the
// allocations emitted in the initialization block for each function of a given
// module. A translation unit can consist of several modules, each with several
// functions hence the structure shown below.
// This data structure enables the emission of dyanmic `alloc` instructions
// in the initialization block of a function if all the other operands the
// computation of its parameters depends on are also present in that function's
// initialization block.
// This data structure is live only during the execution of the frontend
// lowering to Krnl dialect pass (FrontendToKrnlLoweringPass).
extern std::map<ModuleOp, std::unique_ptr<FunctionToInitStates>> initMap;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Common functions used when lowering the ONNX frontend dialect to KRNL. // Common functions used when lowering the ONNX frontend dialect to KRNL.
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -77,14 +44,9 @@ MemRefType convertToMemRefType(Type type);
/// Insert an allocation and deallocation for the given MemRefType. /// Insert an allocation and deallocation for the given MemRefType.
Value insertAllocAndDealloc(MemRefType type, Location loc, Value insertAllocAndDealloc(MemRefType type, Location loc,
PatternRewriter &rewriter, bool insertDealloc, Operation *op, PatternRewriter &rewriter, bool insertDealloc,
ArrayRef<Value> operands = {}, int64_t alignment = -1); ArrayRef<Value> operands = {}, int64_t alignment = -1);
Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
PatternRewriter &rewriter, bool insertDealloc, FuncOp function,
bool functionLevelAlloc, ArrayRef<Value> operands = {},
int64_t alignment = -1);
// Determine if current function returns the result value of the // Determine if current function returns the result value of the
// current op being lowered. If it does then dealloc should not be // current op being lowered. If it does then dealloc should not be
// inserted. // inserted.
@ -284,20 +246,3 @@ void populateLoweringONNXSplitOpPattern(
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp); bool checkOpResultIsUsedByGetRef(AllocOp *allocOp);
int64_t getMemRefSizeInBytes(Value val); int64_t getMemRefSizeInBytes(Value val);
FuncOp getContainingFunction(Operation *op);
void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp op);
bool containingFunctionHasInitBlock(Operation *op);
Block *getInitBlock(FuncOp function);
Block *getMainBlock(FuncOp function);
BranchOp getInitInsertionPoint(FuncOp function);
bool checkAllocMovable(
FuncOp function, bool functionLevelAlloc, ArrayRef<Value> operands);
void markOperandInInitBlock(FuncOp function, Value operand);

View File

@ -161,14 +161,13 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
ConversionPatternRewriter &rewriter, Location loc, ONNXLSTMOp *op, ConversionPatternRewriter &rewriter, Location loc, ONNXLSTMOp *op,
typename ONNXLSTMOp::Adaptor operandAdaptor) { typename ONNXLSTMOp::Adaptor operandAdaptor) {
LstmState state; LstmState state;
FuncOp function = cast<FuncOp>(op->getParentOp());
// Insert allocation and deallocation for the results of this operation. // Insert allocation and deallocation for the results of this operation.
if (!isNoneType(op->Y())) { if (!isNoneType(op->Y())) {
auto yMemRefType = convertToMemRefType(op->Y().getType()); auto yMemRefType = convertToMemRefType(op->Y().getType());
if (hasAllConstantDimensions(yMemRefType)) if (hasAllConstantDimensions(yMemRefType))
state.allH = insertAllocAndDeallocWithFunction(yMemRefType, loc, rewriter, state.allH = insertAllocAndDealloc(yMemRefType, loc, rewriter,
checkInsertDealloc(op->getOperation(), 0), function, true); checkInsertDealloc(op->getOperation(), 0));
else { else {
llvm_unreachable("Unsupported dynamic dimensions."); llvm_unreachable("Unsupported dynamic dimensions.");
} }
@ -180,8 +179,8 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
if (!isNoneType(op->Y_h())) { if (!isNoneType(op->Y_h())) {
auto yhMemRefType = convertToMemRefType(op->Y_h().getType()); auto yhMemRefType = convertToMemRefType(op->Y_h().getType());
if (hasAllConstantDimensions(yhMemRefType)) if (hasAllConstantDimensions(yhMemRefType))
state.ht = insertAllocAndDeallocWithFunction(yhMemRefType, loc, rewriter, state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter,
checkInsertDealloc(op->getOperation(), 1), function, true); checkInsertDealloc(op->getOperation(), 1));
else else
llvm_unreachable("Unsupported dynamic dimensions."); llvm_unreachable("Unsupported dynamic dimensions.");
} else { } else {
@ -189,16 +188,15 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1), {dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
dimAt(operandAdaptor.R(), 2)}, dimAt(operandAdaptor.R(), 2)},
operandAdaptor.X().getType().cast<ShapedType>().getElementType()); operandAdaptor.X().getType().cast<ShapedType>().getElementType());
state.ht = insertAllocAndDeallocWithFunction( state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter, true);
yhMemRefType, loc, rewriter, true, function, true);
} }
// Y_c :: [num_directions, batch_size, hidden_size] // Y_c :: [num_directions, batch_size, hidden_size]
if (!isNoneType(op->Y_c())) { if (!isNoneType(op->Y_c())) {
auto ycMemRefType = convertToMemRefType(op->Y_c().getType()); auto ycMemRefType = convertToMemRefType(op->Y_c().getType());
if (hasAllConstantDimensions(ycMemRefType)) if (hasAllConstantDimensions(ycMemRefType))
state.ct = insertAllocAndDeallocWithFunction(ycMemRefType, loc, rewriter, state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter,
checkInsertDealloc(op->getOperation(), 2), function, true); checkInsertDealloc(op->getOperation(), 2));
else else
llvm_unreachable("Unsupported dynamic dimensions."); llvm_unreachable("Unsupported dynamic dimensions.");
} else { } else {
@ -206,8 +204,7 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1), {dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
dimAt(operandAdaptor.R(), 2)}, dimAt(operandAdaptor.R(), 2)},
operandAdaptor.X().getType().cast<ShapedType>().getElementType()); operandAdaptor.X().getType().cast<ShapedType>().getElementType());
state.ct = insertAllocAndDeallocWithFunction( state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter, true);
ycMemRefType, loc, rewriter, true, function, true);
} }
// Initialize ht and ct. // Initialize ht and ct.

View File

@ -20,7 +20,6 @@ struct ONNXConcatOpLowering : public ConversionPattern {
ConversionPatternRewriter &rewriter) const final { ConversionPatternRewriter &rewriter) const final {
// Gather info. // Gather info.
auto loc = op->getLoc(); auto loc = op->getLoc();
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
ONNXConcatOp concatOp = llvm::dyn_cast<ONNXConcatOp>(op); ONNXConcatOp concatOp = llvm::dyn_cast<ONNXConcatOp>(op);
@ -34,11 +33,10 @@ struct ONNXConcatOpLowering : public ConversionPattern {
assert((axis >= 0 && axis < rank) && "Concat axis out of bounds"); assert((axis >= 0 && axis < rank) && "Concat axis out of bounds");
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, op, {resultOperand}); memRefType, loc, rewriter, insertDealloc, {resultOperand});
// Creates loops, one for each input. // Creates loops, one for each input.
int writeOffset = 0; int writeOffset = 0;

View File

@ -18,7 +18,6 @@ struct ONNXIdentityOpLowering : public ConversionPattern {
LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands, LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const final { ConversionPatternRewriter &rewriter) const final {
auto loc = op->getLoc();
ONNXIdentityOpAdaptor operandAdaptor(operands); ONNXIdentityOpAdaptor operandAdaptor(operands);
rewriter.replaceOp(op, operandAdaptor.input()); rewriter.replaceOp(op, operandAdaptor.input());
return success(); return success();

View File

@ -40,13 +40,11 @@ struct ONNXPadOpLowering : public ConversionPattern {
return emitError(loc, "Pad: unknown pads"); return emitError(loc, "Pad: unknown pads");
auto memRefType = convertToMemRefType(tensorType); auto memRefType = convertToMemRefType(tensorType);
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
return emitError(loc, "unexpected output has non-Constant shape"); return emitError(loc, "unexpected output has non-Constant shape");

View File

@ -32,13 +32,11 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern {
// Insert an allocation and deallocation for the result of this operation. // Insert an allocation and deallocation for the result of this operation.
auto memRefType = convertToMemRefType(tensorType); auto memRefType = convertToMemRefType(tensorType);
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
return emitError(loc, "unexpected output has non-Constant shape"); return emitError(loc, "unexpected output has non-Constant shape");

View File

@ -46,8 +46,7 @@ struct ONNXReshapeOpLowering : public ConversionPattern {
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) { if (hasAllConstantDimensions(memRefType)) {
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
} else { } else {
// If a dimension is zero, the actual dimension value is taken from the // If a dimension is zero, the actual dimension value is taken from the
// input tensor. // input tensor.

View File

@ -40,8 +40,7 @@ struct ONNXSplitOpLowering : public ConversionPattern {
auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType()); auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType());
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else { else {
SmallVector<Value, 4> allocOperands; SmallVector<Value, 4> allocOperands;
auto shape = memRefType.getShape(); auto shape = memRefType.getShape();

View File

@ -39,8 +39,7 @@ struct ONNXSqueezeOpLowering : public ConversionPattern {
Value alloc, tensorSize; Value alloc, tensorSize;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
if (hasAllConstantDimensions(memRefType)) { if (hasAllConstantDimensions(memRefType)) {
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
auto tensorSizeInBytes = elementSizeInBytes; auto tensorSizeInBytes = elementSizeInBytes;
for (int i = 0; i < memRefShape.size(); ++i) { for (int i = 0; i < memRefShape.size(); ++i) {
tensorSizeInBytes *= memRefShape[i]; tensorSizeInBytes *= memRefShape[i];

View File

@ -22,17 +22,15 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
auto loc = op->getLoc(); auto loc = op->getLoc();
// Insert an allocation and deallocation for the result of this operation. // Insert an allocation and deallocation for the result of this operation.
auto memRefType = convertToMemRefType(*op->result_type_begin()); auto memRefType = convertToMemRefType(*op->result_type_begin());
Value alloc; Value alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
Value data = operandAdaptor.data(); Value data = operandAdaptor.data();
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, op, {data}); memRefType, loc, rewriter, insertDealloc, {data});
// Number of loops // Number of loops
auto memRefShape = memRefType.getShape(); auto memRefShape = memRefType.getShape();

View File

@ -44,8 +44,7 @@ struct ONNXUnsqueezeOpLowering : public ConversionPattern {
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
auto memRefShape = memRefType.getShape(); auto memRefShape = memRefType.getShape();
if (hasAllConstantDimensions(memRefType)) { if (hasAllConstantDimensions(memRefType)) {
alloc = alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
for (int i = 0; i < memRefShape.size(); ++i) { for (int i = 0; i < memRefShape.size(); ++i) {
Value dimVal = emitConstantOp( Value dimVal = emitConstantOp(
rewriter, loc, rewriter.getIntegerType(64), memRefShape[i]); rewriter, loc, rewriter.getIntegerType(64), memRefShape[i]);

View File

@ -378,7 +378,6 @@ void addONNXToMLIRPasses(mlir::PassManager &pm) {
void addONNXToKrnlPasses(mlir::PassManager &pm) { void addONNXToKrnlPasses(mlir::PassManager &pm) {
pm.addPass(mlir::createLowerToKrnlPass()); pm.addPass(mlir::createLowerToKrnlPass());
pm.addPass(mlir::createCanonicalizerPass());
pm.addPass(mlir::createPackKrnlGlobalConstantsPass()); pm.addPass(mlir::createPackKrnlGlobalConstantsPass());
// An additional pass of canonicalization is helpful because lowering // An additional pass of canonicalization is helpful because lowering
// from ONNX dialect to Standard dialect exposes additional canonicalization // from ONNX dialect to Standard dialect exposes additional canonicalization

View File

@ -87,6 +87,8 @@ public:
// Get a KrnlGetRefOp which does not use the current alloc. // Get a KrnlGetRefOp which does not use the current alloc.
if (KrnlGetRefOp unbundledGetRef = getUnbundledGetRef(&allocOp)) { if (KrnlGetRefOp unbundledGetRef = getUnbundledGetRef(&allocOp)) {
unbundledGetRef.dump();
// Current memory pool size is the offset for the newly bundled // Current memory pool size is the offset for the newly bundled
// internal MemRef. Emit the offset as a constant. // internal MemRef. Emit the offset as a constant.
auto offset = rewriter.create<ConstantOp>( auto offset = rewriter.create<ConstantOp>(

View File

@ -24,10 +24,10 @@ using namespace mlir;
namespace { namespace {
bool checkOpResultIsReturned(AllocOp *allocOp) { bool checkOpResultIsReturned(AllocOp *allocOp) {
FuncOp function = getContainingFunction(allocOp->getOperation()); auto parentBlock = allocOp->getOperation()->getBlock();
bool opIsReturned = false; bool opIsReturned = false;
function.walk([&opIsReturned, allocOp](ReturnOp op) { parentBlock->walk([&opIsReturned, allocOp](ReturnOp op) {
auto result = allocOp->getResult(); auto result = allocOp->getResult();
for (const auto &operand : op.getOperands()) for (const auto &operand : op.getOperands())
if (operand == result) if (operand == result)

View File

@ -1,4 +1,6 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s // RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
// -----
func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> { func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> {
%0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32> %0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32>

View File

@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --lower-krnl --lower-all-llvm %s | FileCheck %s // RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
func @test_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { func @test_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32> %0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>

View File

@ -1,4 +1,6 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s // RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
// -----
func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi64>) -> tensor<*xf32> { func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi64>) -> tensor<*xf32> {
%0 = "onnx.Reshape"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<4xi64>) -> tensor<*xf32> %0 = "onnx.Reshape"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<4xi64>) -> tensor<*xf32>

View File

@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --bundle-memory-pools --canonicalize %s | FileCheck %s // RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --bundle-memory-pools --canonicalize %s -split-input-file | FileCheck %s
func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>) -> tensor<10x20xf32> { func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>) -> tensor<10x20xf32> {
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32> %0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
@ -10,8 +10,8 @@ func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>
return %5 : tensor<10x20xf32> return %5 : tensor<10x20xf32>
// CHECK-LABEL: test_bundle_memory_pool // CHECK-LABEL: test_bundle_memory_pool
// CHECK: [[CONST00:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[CONST0:%.+]] = constant 0 : i64 // CHECK: [[CONST0:%.+]] = constant 0 : i64
// CHECK: [[CONST00:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[CONST400:%.+]] = constant 400 : i64 // CHECK: [[CONST400:%.+]] = constant 400 : i64
// CHECK: [[CONST1200:%.+]] = constant 1200 : i64 // CHECK: [[CONST1200:%.+]] = constant 1200 : i64
// CHECK: [[CONST2000:%.+]] = constant 2000 : i64 // CHECK: [[CONST2000:%.+]] = constant 2000 : i64

View File

@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool %s | FileCheck %s // RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool %s -split-input-file | FileCheck %s
/// One intermediate value to allocate in the memory pool. /// One intermediate value to allocate in the memory pool.
func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
@ -13,10 +13,10 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
// CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> // CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: affine.store [[ADDF1]], [[GETREF]][symbol(%arg1), symbol(%arg2)] : memref<10x10xf32> // CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: dealloc [[MEMPOOL]] : memref<400xi8> // CHECK: dealloc [[MEMPOOL]] : memref<400xi8>
@ -31,8 +31,8 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
return %2 : tensor<10x20xf32> return %2 : tensor<10x20xf32>
// CHECK-LABEL: test_enable_memory_pool_2 // CHECK-LABEL: test_enable_memory_pool_2
// CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[CONST0:%.+]] = constant 0 : i64 // CHECK: [[CONST0:%.+]] = constant 0 : i64
// CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[RES:%.+]] = alloc() : memref<10x20xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x20xf32>
// CHECK: [[MEMPOOL0:%.+]] = alloc() : memref<800xi8> // CHECK: [[MEMPOOL0:%.+]] = alloc() : memref<800xi8>
// CHECK: [[GETREF0:%.+]] = "krnl.getref"([[MEMPOOL0]], [[CONST0]]) : (memref<800xi8>, i64) -> memref<10x20xf32> // CHECK: [[GETREF0:%.+]] = "krnl.getref"([[MEMPOOL0]], [[CONST0]]) : (memref<800xi8>, i64) -> memref<10x20xf32>
@ -40,24 +40,24 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
// CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> // CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: affine.store [[ADDF1]], [[GETREF1]][symbol(%arg2), symbol(%arg3)] : memref<10x10xf32> // CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][symbol(%arg2), symbol(%arg4)] : memref<10x10xf32> // CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32>
// CHECK: [[LOAD4:%.+]] = affine.load %arg1[symbol(%arg4), symbol(%arg3)] : memref<10x20xf32> // CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32>
// CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32 // CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32 // CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
// CHECK: affine.store [[ADDF2]], [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: [[LOAD7:%.+]] = affine.load %arg1[symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32>
// CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32 // CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32
// CHECK: affine.store [[ADDF3]], [[RES]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32> // CHECK: affine.store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: dealloc [[MEMPOOL1]] : memref<400xi8> // CHECK: dealloc [[MEMPOOL1]] : memref<400xi8>
// CHECK: dealloc [[MEMPOOL0]] : memref<800xi8> // CHECK: dealloc [[MEMPOOL0]] : memref<800xi8>
// CHECK: return [[RES]] : memref<10x20xf32> // CHECK: return [[RES]] : memref<10x20xf32>

View File

@ -695,6 +695,100 @@ func @test_add_with_broadcasting(%arg0 : tensor<?xf32>, %arg1 : tensor<?x10xf32>
// ----- // -----
func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reducemax
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}
// -----
func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reducemin
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}
// -----
func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reduceprod
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}
// -----
func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reducesum
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}
// -----
func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> { func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Softmax"(%arg0) {axis=1:i64} : (tensor<10x10xf32>) -> tensor<*xf32> %0 = "onnx.Softmax"(%arg0) {axis=1:i64} : (tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
@ -1013,10 +1107,10 @@ func @test_matmul5(%arg0 : tensor<5xf32>, %arg1 : tensor<?x5x10xf32>) -> tensor<
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_matmul5 // CHECK-LABEL: test_matmul5
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref<?x5x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref<?x5x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
@ -1045,10 +1139,10 @@ func @test_matmul6(%arg0 : tensor<?x10x5xf32>, %arg1 : tensor<5xf32>) -> tensor<
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_matmul6 // CHECK-LABEL: test_matmul6
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10x5xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10x5xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
@ -1421,3 +1515,506 @@ func @test_concat_1(%arg0 : tensor<5x5x1x32xf32>, %arg1 : tensor<5x5x3x32xf32>,
// CHECK: return [[RES]] : memref<5x5x9x32xf32> // CHECK: return [[RES]] : memref<5x5x9x32xf32>
} }
// -----
func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)>
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)>
// CHECK-DAG: #{{.*}} = affine_map<() -> (0)>
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)>
// CHECK-LABEL: @test_pool_general_computation
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
// CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
}
// -----
func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-DAG: #[[AFFINE_MAP:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)>
// CHECK-LABEL: test_pool_unknown_dimensions
// CHECK: [[C0:%.+]] = constant 2 : index
// CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32>
// CHECK: [[KERNEL:%.+]] = constant 2 : index
// CHECK: [[PAD:%.+]] = constant 0 : index
// CHECK: [[STRIDE:%.+]] = constant 1 : index
// CHECK: [[DILATION:%.+]] = constant 1 : index
// CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}}
// CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32>
}
// -----
func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_averagepool_identity_value
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
}
// -----
func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_maxpool_identity_value
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
}
// -----
func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_averagepool_pooling_operation
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
// CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
// CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32
// CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
}
// -----
func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_maxpool_pooling_operation
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
// CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
// CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
}
// -----
func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
%cst = constant unit
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
return %Y_h : tensor<*xf32>
// CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)>
// CHECK-LABEL: @test_lstm_general_computation
// CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32>
// CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32>
// CHECK: {{.*}} = constant unit
// CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) {
// CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: }
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: {{.*}} = constant 0 : index
// CHECK: {{.*}} = constant 3 : index
// CHECK: {{.*}} = constant 0 : index
// CHECK: {{.*}} = constant 1 : index
// CHECK: {{.*}} = constant 2 : index
// CHECK: {{.*}} = constant 3 : index
// CHECK: {{.*}} = constant 4 : index
// CHECK: {{.*}} = constant 5 : index
// CHECK: {{.*}} = constant 6 : index
// CHECK: {{.*}} = constant 7 : index
// CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) {
// CHECK: [[hCt:%.+]] = alloc() : memref<f32>
// CHECK: [[Ot:%.+]] = alloc() : memref<f32>
// CHECK: [[ct:%.+]] = alloc() : memref<f32>
// CHECK: [[Ft:%.+]] = alloc() : memref<f32>
// CHECK: [[It:%.+]] = alloc() : memref<f32>
// CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) {
// CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3]
// CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3]
// CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3]
// CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3]
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32>
// CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref<f32>
// CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref<f32>
// CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref<f32>
// CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref<f32>
// CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: }
// CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32
// CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[It]][] : memref<f32>
// CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref<f32>
// CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32
// CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ft]][] : memref<f32>
// CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref<f32>
// CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32
// CHECK: [[TANH_CELL:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[ct]][] : memref<f32>
// CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref<f32>
// CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32
// CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32
// CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32
// CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32
// CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ot]][] : memref<f32>
// CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref<f32>
// CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[hCt]][] : memref<f32>
// CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref<f32>
// CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32
// CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: dealloc [[XtWi_GEMM]] : memref<f32>
// CHECK: dealloc [[XtWo_GEMM]] : memref<f32>
// CHECK: dealloc [[XtWf_GEMM]] : memref<f32>
// CHECK: dealloc [[XtWc_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Ri_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Ro_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Rf_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Rc_GEMM]] : memref<f32>
// CHECK: dealloc [[It]] : memref<f32>
// CHECK: dealloc [[Ft]] : memref<f32>
// CHECK: dealloc [[ct]] : memref<f32>
// CHECK: dealloc [[Ot]] : memref<f32>
// CHECK: dealloc [[hCt]] : memref<f32>
// CHECK: }
// CHECK: }
// CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32>
// CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32>
}
// -----
func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
%cst = constant unit
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
return %Y_h : tensor<*xf32>
// CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
// CHECK-LABEL: @test_lstm_reverse_mode
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
}
// -----
func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
%cst = constant unit
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
return %Y_h : tensor<*xf32>
// CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
// CHECK-LABEL: @test_lstm_bidirectional_mode
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32>
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
}
// -----
func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> {
%0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>)
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_squeeze
// CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32>
// CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> ()
// CHECK: return [[RES]] : memref<16x32x64xf32>
}
// -----
func @test_squeeze_unknown_dimensions(%arg0 : tensor<?x1x32x?x64xf32>) -> tensor<*xf32> {
%0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor<?x1x32x?x64xf32>) -> (tensor<*xf32>)
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_squeeze_unknown_dimensions
// CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x1x32x?x64xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x32x64xf32>
// CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64
// CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64
// CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref<?x32x64xf32>, memref<?x1x32x?x64xf32>, i64) -> ()
// CHECK: return [[RES]] : memref<?x32x64xf32>
}
// -----
func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
%0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 8)>
// CHECK-LABEL: @test_split_equal
// CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32>
// CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32>
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
// CHECK: }
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1)
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
// CHECK: }
// CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32>
}
// -----
func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)>
// CHECK-LABEL: @test_split_variable
// CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32>
// CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32>
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32>
// CHECK: }
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2)
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32>
// CHECK: }
// CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32>
}
// -----
func @test_split_unknown_dimension(%arg0 : tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)>
// CHECK-LABEL: @test_split_unknown_dimension
// CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x?x64xf32>
// CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref<?x2x64xf32>
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref<?x?x64xf32>
// CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref<?x30x64xf32>
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref<?x2x64xf32>
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<?x?x64xf32>
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<?x2x64xf32>
// CHECK: }
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
// CHECK: [[C0_3:%.+]] = constant 0 : index
// CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref<?x30x64xf32>
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2)
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<?x?x64xf32>
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<?x30x64xf32>
// CHECK: }
// CHECK: return [[RES_0]], [[RES_1]] : memref<?x2x64xf32>, memref<?x30x64xf32>
}

View File

@ -1,263 +0,0 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
%cst = constant unit
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
return %Y_h : tensor<*xf32>
// CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)>
// CHECK-LABEL: @test_lstm_general_computation
// CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32>
// CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32>
// CHECK: {{.*}} = constant unit
// CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) {
// CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: }
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: {{.*}} = constant 0 : index
// CHECK: {{.*}} = constant 3 : index
// CHECK: {{.*}} = constant 0 : index
// CHECK: {{.*}} = constant 1 : index
// CHECK: {{.*}} = constant 2 : index
// CHECK: {{.*}} = constant 3 : index
// CHECK: {{.*}} = constant 4 : index
// CHECK: {{.*}} = constant 5 : index
// CHECK: {{.*}} = constant 6 : index
// CHECK: {{.*}} = constant 7 : index
// CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) {
// CHECK: [[hCt:%.+]] = alloc() : memref<f32>
// CHECK: [[Ot:%.+]] = alloc() : memref<f32>
// CHECK: [[ct:%.+]] = alloc() : memref<f32>
// CHECK: [[Ft:%.+]] = alloc() : memref<f32>
// CHECK: [[It:%.+]] = alloc() : memref<f32>
// CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) {
// CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3]
// CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3]
// CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3]
// CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3]
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32>
// CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref<f32>
// CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref<f32>
// CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref<f32>
// CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref<f32>
// CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32
// CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: }
// CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
// CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32
// CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[It]][] : memref<f32>
// CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref<f32>
// CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
// CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32
// CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ft]][] : memref<f32>
// CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref<f32>
// CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref<f32>
// CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
// CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32
// CHECK: [[TANH_CELL:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[ct]][] : memref<f32>
// CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref<f32>
// CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32
// CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32
// CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32
// CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref<f32>
// CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
// CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32
// CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[Ot]][] : memref<f32>
// CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref<f32>
// CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref<f32>
// CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: affine.store {{.*}}, [[hCt]][] : memref<f32>
// CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref<f32>
// CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32
// CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
// CHECK: dealloc [[XtWi_GEMM]] : memref<f32>
// CHECK: dealloc [[XtWo_GEMM]] : memref<f32>
// CHECK: dealloc [[XtWf_GEMM]] : memref<f32>
// CHECK: dealloc [[XtWc_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Ri_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Ro_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Rf_GEMM]] : memref<f32>
// CHECK: dealloc [[Ht1Rc_GEMM]] : memref<f32>
// CHECK: dealloc [[It]] : memref<f32>
// CHECK: dealloc [[Ft]] : memref<f32>
// CHECK: dealloc [[ct]] : memref<f32>
// CHECK: dealloc [[Ot]] : memref<f32>
// CHECK: dealloc [[hCt]] : memref<f32>
// CHECK: }
// CHECK: }
// CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32>
// CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32>
}
// -----
func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
%cst = constant unit
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
return %Y_h : tensor<*xf32>
// CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
// CHECK-LABEL: @test_lstm_reverse_mode
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
}
// -----
func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
%cst = constant unit
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
return %Y_h : tensor<*xf32>
// CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
// CHECK-LABEL: @test_lstm_bidirectional_mode
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32>
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
}

View File

@ -1,121 +0,0 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s | FileCheck %s
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)>
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)>
// CHECK-DAG: #{{.*}} = affine_map<() -> (0)>
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)>
// CHECK-DAG: #[[AFFINE_MAP1:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)>
func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_pool_general_computation
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
// CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
}
func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_pool_unknown_dimensions
// CHECK: [[C0:%.+]] = constant 2 : index
// CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32>
// CHECK: [[KERNEL:%.+]] = constant 2 : index
// CHECK: [[PAD:%.+]] = constant 0 : index
// CHECK: [[STRIDE:%.+]] = constant 1 : index
// CHECK: [[DILATION:%.+]] = constant 1 : index
// CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP1]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}}
// CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32>
}
func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_averagepool_identity_value
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
}
func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_maxpool_identity_value
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
}
func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_averagepool_pooling_operation
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
// CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
// CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32
// CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
}
// -----
func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_maxpool_pooling_operation
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
// CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
// CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
// CHECK: }
}

View File

@ -1,93 +0,0 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reducemax
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}
// -----
func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reducemin
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}
// -----
func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reduceprod
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}
// -----
func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
%0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reducesum
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
// CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<3x2xf32>
}

View File

@ -1,85 +0,0 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
%0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
// CHECK: [[INDEX_MAP1:#.+]] = affine_map<(d0) -> (d0 + 8)>
// CHECK-LABEL: @test_split_equal
// CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32>
// CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32>
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
// CHECK: }
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP1]](%arg1)
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
// CHECK: }
// CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32>
}
// -----
func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
// CHECK: [[INDEX_MAP2:#.+]] = affine_map<(d0) -> (d0 + 2)>
// CHECK-LABEL: @test_split_variable
// CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32>
// CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32>
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32>
// CHECK: }
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP2]](%arg2)
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32>
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32>
// CHECK: }
// CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32>
}
// -----
func @test_split_unknown_dimension(%arg0 : tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
// CHECK: [[INDEX_MAP3:#.+]] = affine_map<(d0) -> (d0 + 2)>
// CHECK-LABEL: @test_split_unknown_dimension
// CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x?x64xf32>
// CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref<?x2x64xf32>
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref<?x?x64xf32>
// CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref<?x30x64xf32>
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref<?x2x64xf32>
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<?x?x64xf32>
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<?x2x64xf32>
// CHECK: }
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
// CHECK: [[C0_3:%.+]] = constant 0 : index
// CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref<?x30x64xf32>
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP3]](%arg2)
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<?x?x64xf32>
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<?x30x64xf32>
// CHECK: }
// CHECK: return [[RES_0]], [[RES_1]] : memref<?x2x64xf32>, memref<?x30x64xf32>
}

View File

@ -1,29 +0,0 @@
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> {
%0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>)
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_squeeze
// CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32>
// CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> ()
// CHECK: return [[RES]] : memref<16x32x64xf32>
}
// -----
func @test_squeeze_unknown_dimensions(%arg0 : tensor<?x1x32x?x64xf32>) -> tensor<*xf32> {
%0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor<?x1x32x?x64xf32>) -> (tensor<*xf32>)
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: @test_squeeze_unknown_dimensions
// CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x1x32x?x64xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x32x64xf32>
// CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64
// CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64
// CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref<?x32x64xf32>, memref<?x1x32x?x64xf32>, i64) -> ()
// CHECK: return [[RES]] : memref<?x32x64xf32>
}

View File

@ -239,15 +239,10 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_exp_exp // CHECK-LABEL: test_exp_exp
/// First Exp
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Exp
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -257,6 +252,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Exp /// Second Exp
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -280,14 +278,10 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_tanh_tanh // CHECK-LABEL: test_tanh_tanh
/// First Tanh
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Tanh
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -303,6 +297,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Tanh /// Second Tanh
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -332,14 +329,10 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_sinh_sinh // CHECK-LABEL: test_sinh_sinh
/// First Sinh
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Sinh
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -355,6 +348,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Sinh /// Second Sinh
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -384,14 +380,10 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_cosh_cosh // CHECK-LABEL: test_cosh_cosh
/// First Cosh
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Cosh
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -407,6 +399,9 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Cosh /// Second Cosh
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -435,14 +430,10 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_sigmoid_sigmoid // CHECK-LABEL: test_sigmoid_sigmoid
/// First Sigmoid
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Sigmoid
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -457,6 +448,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Sigmoid /// Second Sigmoid
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -485,14 +479,10 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_relu_relu // CHECK-LABEL: test_relu_relu
/// First Relu
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Relu
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -504,6 +494,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Relu /// Second Relu
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -632,14 +625,10 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_elu_elu // CHECK-LABEL: test_elu_elu
/// First Elu
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Elu
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -656,6 +645,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Elu /// Second Elu
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -686,14 +678,10 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_leakyrelu_leakyrelu // CHECK-LABEL: test_leakyrelu_leakyrelu
/// First LeakyRelu
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First LeakyRelu
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -707,6 +695,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second LeakyRelu /// Second LeakyRelu
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -734,14 +725,10 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_selu_selu // CHECK-LABEL: test_selu_selu
/// First Selu
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Selu
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -759,6 +746,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Selu /// Second Selu
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -790,14 +780,10 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_hardsigmoid_hardsigmoid // CHECK-LABEL: test_hardsigmoid_hardsigmoid
/// First HardSigmoid
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First HardSigmoid
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -816,6 +802,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second HardSigmoid /// Second HardSigmoid
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
@ -848,14 +837,10 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_reciprocal_reciprocal // CHECK-LABEL: test_reciprocal_reciprocal
/// First Reciprocal
// CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[C0:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
/// First Reciprocal
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
@ -866,6 +851,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Reciprocal /// Second Reciprocal
// CHECK: [[C0_1:%.+]] = constant 0 : index
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>

View File

@ -1,4 +1,4 @@
// RUN: onnx-mlir-opt %s | FileCheck %s // RUN: onnx-mlir-opt %s -split-input-file | FileCheck %s
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// CHECK-LABEL: @check_map1(%arg0: tuple<i64, f32>) -> tensor<*xf32> { // CHECK-LABEL: @check_map1(%arg0: tuple<i64, f32>) -> tensor<*xf32> {