Emit allocs at the top of functions (#222)
* Reorganize main function. * Follow review comments. * Emit constants are globals in Krnl and LLVM dialects. * Add support for moving dynamic alloca instructions to top of functions. * Fix memory pooling tests. * Various fixes. * Fix lit tests. * More test fixes. * Reformat. * Reformat some more. * Fix issue with TestConv and split-input-file. * Use smart pointers. * Remove redundant pointer. * Reformat. * Add initMap description. * Clean up tests.
This commit is contained in:
parent
4b33c312d6
commit
b27e57cc4f
|
@ -1,5 +1,4 @@
|
||||||
//====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering
|
//====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering -------===//
|
||||||
//--------===//
|
|
||||||
//
|
//
|
||||||
// Copyright 2019 The IBM Research Authors.
|
// Copyright 2019 The IBM Research Authors.
|
||||||
//
|
//
|
||||||
|
@ -34,6 +33,38 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// FuncOp lowering to Function with init and main blocks.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
struct FuncOpSignatureConversion : public OpConversionPattern<FuncOp> {
|
||||||
|
FuncOpSignatureConversion(MLIRContext *ctx, TypeConverter &converter)
|
||||||
|
: OpConversionPattern(converter, ctx) {}
|
||||||
|
|
||||||
|
/// Hook for derived classes to implement combined matching and rewriting.
|
||||||
|
LogicalResult matchAndRewrite(FuncOp funcOp, ArrayRef<Value> operands,
|
||||||
|
ConversionPatternRewriter &rewriter) const override {
|
||||||
|
FunctionType type = funcOp.getType();
|
||||||
|
|
||||||
|
// Convert the original function types.
|
||||||
|
TypeConverter::SignatureConversion result(type.getNumInputs());
|
||||||
|
SmallVector<Type, 1> newResults;
|
||||||
|
if (failed(typeConverter->convertSignatureArgs(type.getInputs(), result)) ||
|
||||||
|
failed(typeConverter->convertTypes(type.getResults(), newResults)) ||
|
||||||
|
failed(rewriter.convertRegionTypes(
|
||||||
|
&funcOp.getBody(), *typeConverter, &result)))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
// Update the function signature in-place.
|
||||||
|
rewriter.updateRootInPlace(funcOp, [&] {
|
||||||
|
funcOp.setType(FunctionType::get(
|
||||||
|
result.getConvertedTypes(), newResults, funcOp.getContext()));
|
||||||
|
});
|
||||||
|
addInitBlock(rewriter, funcOp.getLoc(), funcOp);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Frontend to Krnl Dialect lowering pass
|
// Frontend to Krnl Dialect lowering pass
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -49,6 +80,10 @@ struct FrontendToKrnlLoweringPass
|
||||||
void FrontendToKrnlLoweringPass::runOnOperation() {
|
void FrontendToKrnlLoweringPass::runOnOperation() {
|
||||||
ModuleOp module = getOperation();
|
ModuleOp module = getOperation();
|
||||||
|
|
||||||
|
// Create an entry for this module
|
||||||
|
initMap.insert(std::pair<ModuleOp, std::unique_ptr<FunctionToInitStates>>(
|
||||||
|
module, std::make_unique<FunctionToInitStates>()));
|
||||||
|
|
||||||
// The first thing to define is the conversion target. This will define the
|
// The first thing to define is the conversion target. This will define the
|
||||||
// final target for this lowering.
|
// final target for this lowering.
|
||||||
ConversionTarget target(getContext());
|
ConversionTarget target(getContext());
|
||||||
|
@ -77,12 +112,6 @@ void FrontendToKrnlLoweringPass::runOnOperation() {
|
||||||
return tensor_to_memref_converter.isSignatureLegal(op.getType());
|
return tensor_to_memref_converter.isSignatureLegal(op.getType());
|
||||||
});
|
});
|
||||||
|
|
||||||
// Type conversion for function signatures.
|
|
||||||
// Call MLIR FuncOp signature conversion when result type is
|
|
||||||
// a ranked tensor.
|
|
||||||
populateFuncOpTypeConversionPattern(
|
|
||||||
patterns, &getContext(), tensor_to_memref_converter);
|
|
||||||
|
|
||||||
// Frontend operation lowering.
|
// Frontend operation lowering.
|
||||||
// Math
|
// Math
|
||||||
populateLoweringONNXElementwiseOpPattern(patterns, &getContext());
|
populateLoweringONNXElementwiseOpPattern(patterns, &getContext());
|
||||||
|
@ -109,12 +138,16 @@ void FrontendToKrnlLoweringPass::runOnOperation() {
|
||||||
populateLoweringONNXLSTMOpPattern(patterns, &getContext());
|
populateLoweringONNXLSTMOpPattern(patterns, &getContext());
|
||||||
// Entry point
|
// Entry point
|
||||||
patterns.insert<ONNXEntryPointLowering>(&getContext());
|
patterns.insert<ONNXEntryPointLowering>(&getContext());
|
||||||
|
patterns.insert<FuncOpSignatureConversion>(
|
||||||
|
&getContext(), tensor_to_memref_converter);
|
||||||
|
|
||||||
// With the target and rewrite patterns defined, we can now attempt the
|
// With the target and rewrite patterns defined, we can now attempt the
|
||||||
// conversion. The conversion will signal failure if any of our `illegal`
|
// conversion. The conversion will signal failure if any of our `illegal`
|
||||||
// operations were not converted successfully.
|
// operations were not converted successfully.
|
||||||
if (failed(applyPartialConversion(module, target, patterns)))
|
if (failed(applyPartialConversion(module, target, patterns)))
|
||||||
signalPassFailure();
|
signalPassFailure();
|
||||||
|
|
||||||
|
initMap.erase(module);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Pass> mlir::createLowerToKrnlPass() {
|
std::unique_ptr<Pass> mlir::createLowerToKrnlPass() {
|
||||||
|
|
|
@ -518,10 +518,11 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
|
||||||
else
|
|
||||||
alloc =
|
alloc =
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, {X});
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
|
else
|
||||||
|
alloc = insertAllocAndDealloc(
|
||||||
|
memRefType, loc, rewriter, insertDealloc, op, {X});
|
||||||
|
|
||||||
SmallVector<Value, 4> loopIVs;
|
SmallVector<Value, 4> loopIVs;
|
||||||
if (!hasAllScalarValues(operands)) {
|
if (!hasAllScalarValues(operands)) {
|
||||||
|
@ -574,10 +575,11 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
|
||||||
// comes from.
|
// comes from.
|
||||||
// TODO: can the dimension of the result differ after optimizations?
|
// TODO: can the dimension of the result differ after optimizations?
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, operands);
|
memRefType, loc, rewriter, insertDealloc, op, operands);
|
||||||
|
|
||||||
SmallVector<Value, 4> loopIVs;
|
SmallVector<Value, 4> loopIVs;
|
||||||
std::map<int, std::map<int, Value>> broadcastedDimInfo;
|
std::map<int, std::map<int, Value>> broadcastedDimInfo;
|
||||||
|
|
|
@ -46,7 +46,8 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else {
|
else {
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
SmallVector<Value, 2> allocOperands;
|
SmallVector<Value, 2> allocOperands;
|
||||||
|
|
|
@ -43,8 +43,16 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else {
|
else {
|
||||||
|
PatternRewriter::InsertionGuard insertGuard(rewriter);
|
||||||
|
FuncOp function = getContainingFunction(op);
|
||||||
|
bool functionLevelAlloc = (op->getParentOp() == function);
|
||||||
|
bool canMove = checkAllocMovable(function, functionLevelAlloc, {A, B});
|
||||||
|
if (canMove)
|
||||||
|
rewriter.setInsertionPoint(getInitInsertionPoint(function));
|
||||||
|
|
||||||
SmallVector<Value, 4> allocOperands;
|
SmallVector<Value, 4> allocOperands;
|
||||||
if (AShape.size() >= 2 && BShape.size() >= 2) {
|
if (AShape.size() >= 2 && BShape.size() >= 2) {
|
||||||
// Both arguments are N-D, N >= 2
|
// Both arguments are N-D, N >= 2
|
||||||
|
@ -108,6 +116,9 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
||||||
}
|
}
|
||||||
|
|
||||||
alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands);
|
alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands);
|
||||||
|
|
||||||
|
if (canMove)
|
||||||
|
markOperandInInitBlock(function, alloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AShape.size() >= 2 || BShape.size() >= 2) {
|
if (AShape.size() >= 2 || BShape.size() >= 2) {
|
||||||
|
|
|
@ -159,8 +159,8 @@ struct ONNXReductionOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefOutType)) {
|
if (hasAllConstantDimensions(memRefOutType)) {
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(
|
||||||
insertAllocAndDealloc(memRefOutType, loc, rewriter, insertDealloc);
|
memRefOutType, loc, rewriter, insertDealloc, op);
|
||||||
} else {
|
} else {
|
||||||
SmallVector<Value, 2> allocOperands;
|
SmallVector<Value, 2> allocOperands;
|
||||||
for (decltype(outRank) i = 0; i < outRank; ++i) {
|
for (decltype(outRank) i = 0; i < outRank; ++i) {
|
||||||
|
|
|
@ -36,18 +36,21 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, input);
|
memRefType, loc, rewriter, insertDealloc, op, input);
|
||||||
|
|
||||||
// Shape of the result
|
// Shape of the result
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
|
|
||||||
// Insert allocations and deallocations for sum and max.
|
// Insert allocations and deallocations for sum and max.
|
||||||
MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0);
|
MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0);
|
||||||
Value sumOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true);
|
Value sumOp =
|
||||||
Value maxOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true);
|
insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op);
|
||||||
|
Value maxOp =
|
||||||
|
insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op);
|
||||||
Value zero = emitConstantOp(rewriter, loc, elementType, 0);
|
Value zero = emitConstantOp(rewriter, loc, elementType, 0);
|
||||||
Value negInfinity = rewriter.create<ConstantOp>(loc,
|
Value negInfinity = rewriter.create<ConstantOp>(loc,
|
||||||
FloatAttr::get(elementType, -std::numeric_limits<float>::infinity()));
|
FloatAttr::get(elementType, -std::numeric_limits<float>::infinity()));
|
||||||
|
|
|
@ -36,10 +36,11 @@ struct ONNXConvOpLowering : public ConversionPattern {
|
||||||
bool hasBias = !biasOperand.getType().isa<NoneType>();
|
bool hasBias = !biasOperand.getType().isa<NoneType>();
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, {inputOperand});
|
memRefType, loc, rewriter, insertDealloc, op, {inputOperand});
|
||||||
|
|
||||||
// R = Conv(D, K)
|
// R = Conv(D, K)
|
||||||
//
|
//
|
||||||
|
|
|
@ -42,10 +42,11 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, {operand});
|
memRefType, loc, rewriter, insertDealloc, op, {operand});
|
||||||
|
|
||||||
// Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N.
|
// Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N.
|
||||||
// In case of N, C is assumed to be 1.
|
// In case of N, C is assumed to be 1.
|
||||||
|
|
|
@ -235,7 +235,8 @@ struct ONNXPoolOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else {
|
else {
|
||||||
alloc = insertAllocAndDeallocForPooling(rewriter, loc, insertDealloc,
|
alloc = insertAllocAndDeallocForPooling(rewriter, loc, insertDealloc,
|
||||||
memRefType, inputOperand, kernelShape, pads, strides, dilations,
|
memRefType, inputOperand, kernelShape, pads, strides, dilations,
|
||||||
|
|
|
@ -11,6 +11,8 @@
|
||||||
|
|
||||||
#include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
|
#include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
|
||||||
|
|
||||||
|
std::map<ModuleOp, std::unique_ptr<FunctionToInitStates>> initMap;
|
||||||
|
|
||||||
/// Check is all dimensions are known at compile time.
|
/// Check is all dimensions are known at compile time.
|
||||||
bool hasAllConstantDimensions(MemRefType type) {
|
bool hasAllConstantDimensions(MemRefType type) {
|
||||||
auto memRefShape = type.getShape();
|
auto memRefShape = type.getShape();
|
||||||
|
@ -43,11 +45,151 @@ MemRefType convertToMemRefType(Type type) {
|
||||||
return memRefType;
|
return memRefType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieve function which contains the current operation.
|
||||||
|
FuncOp getContainingFunction(Operation *op) {
|
||||||
|
Operation *parentFuncOp = op->getParentOp();
|
||||||
|
|
||||||
|
// While parent is not a FuncOp and its cast to a FuncOp is null.
|
||||||
|
while (!llvm::dyn_cast_or_null<FuncOp>(parentFuncOp))
|
||||||
|
parentFuncOp = parentFuncOp->getParentOp();
|
||||||
|
|
||||||
|
return cast<FuncOp>(parentFuncOp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp function) {
|
||||||
|
// If this is the first time we encounter an operation in this
|
||||||
|
// function, we create an entry inside the initMap and split the
|
||||||
|
// function body into an init block and a main block.
|
||||||
|
//
|
||||||
|
// function func_name() {
|
||||||
|
// ... init block ...
|
||||||
|
// br ^bb1
|
||||||
|
// ^bb1: // pred: ^bb0
|
||||||
|
// ... main block ...
|
||||||
|
// return
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Note: the block ^bb0 being the first block has its label omitted.
|
||||||
|
//
|
||||||
|
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
||||||
|
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
||||||
|
if (initStates->count(function) == 0) {
|
||||||
|
initStates->insert(
|
||||||
|
std::pair<FuncOp, std::unique_ptr<ONNXOperandsInitState>>(
|
||||||
|
function, std::make_unique<ONNXOperandsInitState>()));
|
||||||
|
std::unique_ptr<ONNXOperandsInitState> &initState =
|
||||||
|
initStates->at(function);
|
||||||
|
|
||||||
|
// All input arguments are considered as part of the initialization block
|
||||||
|
// so add them to the operandsInInitBlock set.
|
||||||
|
Block *functionBlock = &function.front();
|
||||||
|
for (auto arg : functionBlock->getArguments())
|
||||||
|
initState->operandsInInitBlock.insert(arg);
|
||||||
|
|
||||||
|
PatternRewriter::InsertionGuard insertGuard(rewriter);
|
||||||
|
rewriter.setInsertionPointToStart(functionBlock);
|
||||||
|
|
||||||
|
initState->initBlock = rewriter.getInsertionBlock();
|
||||||
|
auto currentPoint = rewriter.getInsertionPoint();
|
||||||
|
initState->mainBlock =
|
||||||
|
rewriter.splitBlock(initState->initBlock, currentPoint);
|
||||||
|
|
||||||
|
rewriter.setInsertionPointToEnd(initState->initBlock);
|
||||||
|
|
||||||
|
// Insert a branch operation from initBlock to mainBlock. This
|
||||||
|
// ensures the final code contains legal blocks.
|
||||||
|
initState->branchInit =
|
||||||
|
rewriter.create<BranchOp>(loc, initState->mainBlock);
|
||||||
|
|
||||||
|
// Set insertion point to start of mainBlock.
|
||||||
|
rewriter.setInsertionPointToStart(initState->mainBlock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool containingFunctionHasInitBlock(Operation *op) {
|
||||||
|
FuncOp function = getContainingFunction(op);
|
||||||
|
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
||||||
|
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
||||||
|
return initStates->count(function) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Block *getInitBlock(FuncOp function) {
|
||||||
|
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
||||||
|
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
||||||
|
assert(initStates->count(function) > 0 &&
|
||||||
|
"Initialization state not defined for this function.");
|
||||||
|
return initStates->at(function)->initBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
Block *getMainBlock(FuncOp function) {
|
||||||
|
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
||||||
|
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
||||||
|
assert(initStates->count(function) > 0 &&
|
||||||
|
"Initialization state not defined for this function.");
|
||||||
|
return initStates->at(function)->mainBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
BranchOp getInitInsertionPoint(FuncOp function) {
|
||||||
|
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
||||||
|
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
||||||
|
assert(initStates->count(function) > 0 &&
|
||||||
|
"Initialization state not defined for this function.");
|
||||||
|
return initStates->at(function)->branchInit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if all operands used for allocating the size of the result are
|
||||||
|
/// in the initialization block (i.e. initBlock).
|
||||||
|
bool checkAllocMovable(
|
||||||
|
FuncOp function, bool functionLevelAlloc, ArrayRef<Value> operands) {
|
||||||
|
// If no initialization block exists then alloc cannot be moved.
|
||||||
|
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
||||||
|
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
||||||
|
if (initStates->count(function) == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// If the alloc is not function level alloc then it cannot be moved.
|
||||||
|
if (!functionLevelAlloc)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
bool allInitOrArg = true;
|
||||||
|
for (int i = 0; i < operands.size(); i++) {
|
||||||
|
if (initStates->at(function)->operandsInInitBlock.count(operands[i]) == 0)
|
||||||
|
allInitOrArg = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return allInitOrArg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add operand to list of operands in the init block.
|
||||||
|
void markOperandInInitBlock(FuncOp function, Value operand) {
|
||||||
|
// Check if function is valid. At this point it has to be.
|
||||||
|
assert(function && "Attempt to add operand when function is null.");
|
||||||
|
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
||||||
|
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
||||||
|
// A valid function must have an initialization state.
|
||||||
|
assert(initStates->count(function) > 0 &&
|
||||||
|
"Initialization state not defined for this function.");
|
||||||
|
initStates->at(function)->operandsInInitBlock.insert(operand);
|
||||||
|
}
|
||||||
|
|
||||||
/// Insert an allocation and deallocation for the given MemRefType.
|
/// Insert an allocation and deallocation for the given MemRefType.
|
||||||
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
|
||||||
PatternRewriter &rewriter, bool insertDealloc, ArrayRef<Value> operands,
|
PatternRewriter &rewriter, bool insertDealloc, FuncOp function,
|
||||||
int64_t alignment) {
|
bool functionLevelAlloc, ArrayRef<Value> operands, int64_t alignment) {
|
||||||
// Put together alloc operands for any dynamic dimensions of the memref.
|
// Put together alloc operands for any dynamic dimensions of the memref.
|
||||||
|
// Save insertion point in case we need to change it to the initBlock.
|
||||||
|
PatternRewriter::InsertionGuard insertGuard(rewriter);
|
||||||
|
|
||||||
|
// Check if all operands of the alloc are in the init region or are input
|
||||||
|
// arguments. If some of them are not or there is no init block, this
|
||||||
|
// variable will be false.
|
||||||
|
bool canMove = checkAllocMovable(function, functionLevelAlloc, operands);
|
||||||
|
|
||||||
|
// If a legal move to the init block is possible, set insertion point
|
||||||
|
// at the end of the initialization block just before the branch instruction.
|
||||||
|
if (canMove)
|
||||||
|
rewriter.setInsertionPoint(getInitInsertionPoint(function));
|
||||||
|
|
||||||
AllocOp alloc;
|
AllocOp alloc;
|
||||||
if (!operands.empty()) {
|
if (!operands.empty()) {
|
||||||
auto memRefShape = type.getShape();
|
auto memRefShape = type.getShape();
|
||||||
|
@ -97,6 +239,11 @@ Value insertAllocAndDealloc(MemRefType type, Location loc,
|
||||||
} else {
|
} else {
|
||||||
alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
|
alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the alloc was emitted inside the initializatin block then mark add
|
||||||
|
// it to the set of values emitted in the initialization block.
|
||||||
|
if (canMove)
|
||||||
|
markOperandInInitBlock(function, alloc.getResult());
|
||||||
} else {
|
} else {
|
||||||
// Set alignment attribute. Default value is `-1`, which does not set
|
// Set alignment attribute. Default value is `-1`, which does not set
|
||||||
// alignment.
|
// alignment.
|
||||||
|
@ -113,17 +260,52 @@ Value insertAllocAndDealloc(MemRefType type, Location loc,
|
||||||
// Make sure to allocate at the beginning of the block if
|
// Make sure to allocate at the beginning of the block if
|
||||||
// all dimensions are known.
|
// all dimensions are known.
|
||||||
auto *parentBlock = alloc.getOperation()->getBlock();
|
auto *parentBlock = alloc.getOperation()->getBlock();
|
||||||
if (hasAllConstantDimensions(type))
|
if (hasAllConstantDimensions(type)) {
|
||||||
|
// Check if this move is a move to the init block or to the top of the
|
||||||
|
// function without an init block. For the case in which all dimensions
|
||||||
|
// are constant, the `canMove` variable will be false if there is no
|
||||||
|
// init block.
|
||||||
|
if (canMove) {
|
||||||
|
// The alloc was emitted in the init block already so just record
|
||||||
|
// that this value is not available in the init block.
|
||||||
|
alloc.getOperation()->moveBefore(&getInitBlock(function)->front());
|
||||||
|
markOperandInInitBlock(function, alloc.getResult());
|
||||||
|
} else {
|
||||||
|
// No init block exists in this case so just move it as before.
|
||||||
alloc.getOperation()->moveBefore(&parentBlock->front());
|
alloc.getOperation()->moveBefore(&parentBlock->front());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (insertDealloc) {
|
if (insertDealloc) {
|
||||||
auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
|
auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
|
||||||
|
// Move dealloc to the end of the main block if such a block exists.
|
||||||
|
if (canMove) {
|
||||||
|
Block *mainBlock = getMainBlock(function);
|
||||||
|
dealloc.getOperation()->moveBefore(&mainBlock->back());
|
||||||
|
} else {
|
||||||
|
// If no main block exists, move to parent block.
|
||||||
dealloc.getOperation()->moveBefore(&parentBlock->back());
|
dealloc.getOperation()->moveBefore(&parentBlock->back());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return alloc;
|
return alloc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Insert an allocation and deallocation for the given MemRefType.
|
||||||
|
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
||||||
|
PatternRewriter &rewriter, bool insertDealloc, Operation *op,
|
||||||
|
ArrayRef<Value> operands, int64_t alignment) {
|
||||||
|
FuncOp function = getContainingFunction(op);
|
||||||
|
|
||||||
|
bool functionLevelAlloc = (op->getParentOp() == function);
|
||||||
|
if (!functionLevelAlloc) {
|
||||||
|
printf("This is not a function level alloc!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return insertAllocAndDeallocWithFunction(type, loc, rewriter, insertDealloc,
|
||||||
|
function, functionLevelAlloc, operands, alignment);
|
||||||
|
}
|
||||||
|
|
||||||
// Determine if current function returns the result value of the
|
// Determine if current function returns the result value of the
|
||||||
// current op being lowered. If it does then dealloc should not be
|
// current op being lowered. If it does then dealloc should not be
|
||||||
// inserted.
|
// inserted.
|
||||||
|
@ -463,10 +645,10 @@ int64_t ArrayAttrIntVal(ArrayAttr a, int i) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp) {
|
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp) {
|
||||||
auto parentBlock = allocOp->getOperation()->getBlock();
|
FuncOp function = getContainingFunction(allocOp->getOperation());
|
||||||
|
|
||||||
bool opIsUsedInGetRef = false;
|
bool opIsUsedInGetRef = false;
|
||||||
parentBlock->walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) {
|
function.walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) {
|
||||||
auto result = allocOp->getResult();
|
auto result = allocOp->getResult();
|
||||||
for (const auto &operand : op.getOperands())
|
for (const auto &operand : op.getOperands())
|
||||||
if (operand == result)
|
if (operand == result)
|
||||||
|
|
|
@ -19,7 +19,9 @@
|
||||||
#include "mlir/Pass/Pass.h"
|
#include "mlir/Pass/Pass.h"
|
||||||
#include "mlir/Transforms/DialectConversion.h"
|
#include "mlir/Transforms/DialectConversion.h"
|
||||||
#include "llvm/ADT/ArrayRef.h"
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/Sequence.h"
|
#include "llvm/ADT/Sequence.h"
|
||||||
|
#include "llvm/ADT/SetVector.h"
|
||||||
|
|
||||||
#include "src/Dialect/Krnl/KrnlHelper.hpp"
|
#include "src/Dialect/Krnl/KrnlHelper.hpp"
|
||||||
#include "src/Dialect/Krnl/KrnlOps.hpp"
|
#include "src/Dialect/Krnl/KrnlOps.hpp"
|
||||||
|
@ -29,6 +31,37 @@
|
||||||
|
|
||||||
using namespace mlir;
|
using namespace mlir;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Insertion point for initialization instructions and the blocks used for
|
||||||
|
// inserting the initialization and main code. These blocks will disappear
|
||||||
|
// when the first canonicalization is performed because the init block
|
||||||
|
// unconditionally branches into the second block. These blocks exist only for
|
||||||
|
// the purpose of this optimization.
|
||||||
|
// The support happens on a per function basis.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
typedef struct ONNXOperandsInitState {
|
||||||
|
Block *initBlock;
|
||||||
|
Block *mainBlock;
|
||||||
|
BranchOp branchInit;
|
||||||
|
llvm::SetVector<Value> operandsInInitBlock;
|
||||||
|
} ONNXOperandsInitState;
|
||||||
|
|
||||||
|
typedef std::map<FuncOp, std::unique_ptr<ONNXOperandsInitState>>
|
||||||
|
FunctionToInitStates;
|
||||||
|
|
||||||
|
// This map is used by the FrontendToKrnlLoweringPass pass to keep track of the
|
||||||
|
// allocations emitted in the initialization block for each function of a given
|
||||||
|
// module. A translation unit can consist of several modules, each with several
|
||||||
|
// functions hence the structure shown below.
|
||||||
|
// This data structure enables the emission of dyanmic `alloc` instructions
|
||||||
|
// in the initialization block of a function if all the other operands the
|
||||||
|
// computation of its parameters depends on are also present in that function's
|
||||||
|
// initialization block.
|
||||||
|
// This data structure is live only during the execution of the frontend
|
||||||
|
// lowering to Krnl dialect pass (FrontendToKrnlLoweringPass).
|
||||||
|
extern std::map<ModuleOp, std::unique_ptr<FunctionToInitStates>> initMap;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Common functions used when lowering the ONNX frontend dialect to KRNL.
|
// Common functions used when lowering the ONNX frontend dialect to KRNL.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -44,9 +77,14 @@ MemRefType convertToMemRefType(Type type);
|
||||||
|
|
||||||
/// Insert an allocation and deallocation for the given MemRefType.
|
/// Insert an allocation and deallocation for the given MemRefType.
|
||||||
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
||||||
PatternRewriter &rewriter, bool insertDealloc,
|
PatternRewriter &rewriter, bool insertDealloc, Operation *op,
|
||||||
ArrayRef<Value> operands = {}, int64_t alignment = -1);
|
ArrayRef<Value> operands = {}, int64_t alignment = -1);
|
||||||
|
|
||||||
|
Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
|
||||||
|
PatternRewriter &rewriter, bool insertDealloc, FuncOp function,
|
||||||
|
bool functionLevelAlloc, ArrayRef<Value> operands = {},
|
||||||
|
int64_t alignment = -1);
|
||||||
|
|
||||||
// Determine if current function returns the result value of the
|
// Determine if current function returns the result value of the
|
||||||
// current op being lowered. If it does then dealloc should not be
|
// current op being lowered. If it does then dealloc should not be
|
||||||
// inserted.
|
// inserted.
|
||||||
|
@ -246,3 +284,20 @@ void populateLoweringONNXSplitOpPattern(
|
||||||
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp);
|
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp);
|
||||||
|
|
||||||
int64_t getMemRefSizeInBytes(Value val);
|
int64_t getMemRefSizeInBytes(Value val);
|
||||||
|
|
||||||
|
FuncOp getContainingFunction(Operation *op);
|
||||||
|
|
||||||
|
void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp op);
|
||||||
|
|
||||||
|
bool containingFunctionHasInitBlock(Operation *op);
|
||||||
|
|
||||||
|
Block *getInitBlock(FuncOp function);
|
||||||
|
|
||||||
|
Block *getMainBlock(FuncOp function);
|
||||||
|
|
||||||
|
BranchOp getInitInsertionPoint(FuncOp function);
|
||||||
|
|
||||||
|
bool checkAllocMovable(
|
||||||
|
FuncOp function, bool functionLevelAlloc, ArrayRef<Value> operands);
|
||||||
|
|
||||||
|
void markOperandInInitBlock(FuncOp function, Value operand);
|
||||||
|
|
|
@ -161,13 +161,14 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
ConversionPatternRewriter &rewriter, Location loc, ONNXLSTMOp *op,
|
ConversionPatternRewriter &rewriter, Location loc, ONNXLSTMOp *op,
|
||||||
typename ONNXLSTMOp::Adaptor operandAdaptor) {
|
typename ONNXLSTMOp::Adaptor operandAdaptor) {
|
||||||
LstmState state;
|
LstmState state;
|
||||||
|
FuncOp function = cast<FuncOp>(op->getParentOp());
|
||||||
|
|
||||||
// Insert allocation and deallocation for the results of this operation.
|
// Insert allocation and deallocation for the results of this operation.
|
||||||
if (!isNoneType(op->Y())) {
|
if (!isNoneType(op->Y())) {
|
||||||
auto yMemRefType = convertToMemRefType(op->Y().getType());
|
auto yMemRefType = convertToMemRefType(op->Y().getType());
|
||||||
if (hasAllConstantDimensions(yMemRefType))
|
if (hasAllConstantDimensions(yMemRefType))
|
||||||
state.allH = insertAllocAndDealloc(yMemRefType, loc, rewriter,
|
state.allH = insertAllocAndDeallocWithFunction(yMemRefType, loc, rewriter,
|
||||||
checkInsertDealloc(op->getOperation(), 0));
|
checkInsertDealloc(op->getOperation(), 0), function, true);
|
||||||
else {
|
else {
|
||||||
llvm_unreachable("Unsupported dynamic dimensions.");
|
llvm_unreachable("Unsupported dynamic dimensions.");
|
||||||
}
|
}
|
||||||
|
@ -179,8 +180,8 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
if (!isNoneType(op->Y_h())) {
|
if (!isNoneType(op->Y_h())) {
|
||||||
auto yhMemRefType = convertToMemRefType(op->Y_h().getType());
|
auto yhMemRefType = convertToMemRefType(op->Y_h().getType());
|
||||||
if (hasAllConstantDimensions(yhMemRefType))
|
if (hasAllConstantDimensions(yhMemRefType))
|
||||||
state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter,
|
state.ht = insertAllocAndDeallocWithFunction(yhMemRefType, loc, rewriter,
|
||||||
checkInsertDealloc(op->getOperation(), 1));
|
checkInsertDealloc(op->getOperation(), 1), function, true);
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unsupported dynamic dimensions.");
|
llvm_unreachable("Unsupported dynamic dimensions.");
|
||||||
} else {
|
} else {
|
||||||
|
@ -188,15 +189,16 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
||||||
dimAt(operandAdaptor.R(), 2)},
|
dimAt(operandAdaptor.R(), 2)},
|
||||||
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
||||||
state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter, true);
|
state.ht = insertAllocAndDeallocWithFunction(
|
||||||
|
yhMemRefType, loc, rewriter, true, function, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Y_c :: [num_directions, batch_size, hidden_size]
|
// Y_c :: [num_directions, batch_size, hidden_size]
|
||||||
if (!isNoneType(op->Y_c())) {
|
if (!isNoneType(op->Y_c())) {
|
||||||
auto ycMemRefType = convertToMemRefType(op->Y_c().getType());
|
auto ycMemRefType = convertToMemRefType(op->Y_c().getType());
|
||||||
if (hasAllConstantDimensions(ycMemRefType))
|
if (hasAllConstantDimensions(ycMemRefType))
|
||||||
state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter,
|
state.ct = insertAllocAndDeallocWithFunction(ycMemRefType, loc, rewriter,
|
||||||
checkInsertDealloc(op->getOperation(), 2));
|
checkInsertDealloc(op->getOperation(), 2), function, true);
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unsupported dynamic dimensions.");
|
llvm_unreachable("Unsupported dynamic dimensions.");
|
||||||
} else {
|
} else {
|
||||||
|
@ -204,7 +206,8 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
||||||
dimAt(operandAdaptor.R(), 2)},
|
dimAt(operandAdaptor.R(), 2)},
|
||||||
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
||||||
state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter, true);
|
state.ct = insertAllocAndDeallocWithFunction(
|
||||||
|
ycMemRefType, loc, rewriter, true, function, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize ht and ct.
|
// Initialize ht and ct.
|
||||||
|
|
|
@ -20,6 +20,7 @@ struct ONNXConcatOpLowering : public ConversionPattern {
|
||||||
ConversionPatternRewriter &rewriter) const final {
|
ConversionPatternRewriter &rewriter) const final {
|
||||||
// Gather info.
|
// Gather info.
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
ONNXConcatOp concatOp = llvm::dyn_cast<ONNXConcatOp>(op);
|
ONNXConcatOp concatOp = llvm::dyn_cast<ONNXConcatOp>(op);
|
||||||
|
@ -33,10 +34,11 @@ struct ONNXConcatOpLowering : public ConversionPattern {
|
||||||
assert((axis >= 0 && axis < rank) && "Concat axis out of bounds");
|
assert((axis >= 0 && axis < rank) && "Concat axis out of bounds");
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, {resultOperand});
|
memRefType, loc, rewriter, insertDealloc, op, {resultOperand});
|
||||||
|
|
||||||
// Creates loops, one for each input.
|
// Creates loops, one for each input.
|
||||||
int writeOffset = 0;
|
int writeOffset = 0;
|
||||||
|
|
|
@ -18,6 +18,7 @@ struct ONNXIdentityOpLowering : public ConversionPattern {
|
||||||
|
|
||||||
LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
|
LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
|
||||||
ConversionPatternRewriter &rewriter) const final {
|
ConversionPatternRewriter &rewriter) const final {
|
||||||
|
auto loc = op->getLoc();
|
||||||
ONNXIdentityOpAdaptor operandAdaptor(operands);
|
ONNXIdentityOpAdaptor operandAdaptor(operands);
|
||||||
rewriter.replaceOp(op, operandAdaptor.input());
|
rewriter.replaceOp(op, operandAdaptor.input());
|
||||||
return success();
|
return success();
|
||||||
|
|
|
@ -40,11 +40,13 @@ struct ONNXPadOpLowering : public ConversionPattern {
|
||||||
return emitError(loc, "Pad: unknown pads");
|
return emitError(loc, "Pad: unknown pads");
|
||||||
|
|
||||||
auto memRefType = convertToMemRefType(tensorType);
|
auto memRefType = convertToMemRefType(tensorType);
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
return emitError(loc, "unexpected output has non-Constant shape");
|
return emitError(loc, "unexpected output has non-Constant shape");
|
||||||
|
|
||||||
|
|
|
@ -32,11 +32,13 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern {
|
||||||
|
|
||||||
// Insert an allocation and deallocation for the result of this operation.
|
// Insert an allocation and deallocation for the result of this operation.
|
||||||
auto memRefType = convertToMemRefType(tensorType);
|
auto memRefType = convertToMemRefType(tensorType);
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
return emitError(loc, "unexpected output has non-Constant shape");
|
return emitError(loc, "unexpected output has non-Constant shape");
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,8 @@ struct ONNXReshapeOpLowering : public ConversionPattern {
|
||||||
|
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType)) {
|
if (hasAllConstantDimensions(memRefType)) {
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
} else {
|
} else {
|
||||||
// If a dimension is zero, the actual dimension value is taken from the
|
// If a dimension is zero, the actual dimension value is taken from the
|
||||||
// input tensor.
|
// input tensor.
|
||||||
|
|
|
@ -40,7 +40,8 @@ struct ONNXSplitOpLowering : public ConversionPattern {
|
||||||
auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType());
|
auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType());
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else {
|
else {
|
||||||
SmallVector<Value, 4> allocOperands;
|
SmallVector<Value, 4> allocOperands;
|
||||||
auto shape = memRefType.getShape();
|
auto shape = memRefType.getShape();
|
||||||
|
|
|
@ -39,7 +39,8 @@ struct ONNXSqueezeOpLowering : public ConversionPattern {
|
||||||
Value alloc, tensorSize;
|
Value alloc, tensorSize;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType)) {
|
if (hasAllConstantDimensions(memRefType)) {
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
auto tensorSizeInBytes = elementSizeInBytes;
|
auto tensorSizeInBytes = elementSizeInBytes;
|
||||||
for (int i = 0; i < memRefShape.size(); ++i) {
|
for (int i = 0; i < memRefShape.size(); ++i) {
|
||||||
tensorSizeInBytes *= memRefShape[i];
|
tensorSizeInBytes *= memRefShape[i];
|
||||||
|
|
|
@ -22,15 +22,17 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
// Insert an allocation and deallocation for the result of this operation.
|
// Insert an allocation and deallocation for the result of this operation.
|
||||||
auto memRefType = convertToMemRefType(*op->result_type_begin());
|
auto memRefType = convertToMemRefType(*op->result_type_begin());
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
Value data = operandAdaptor.data();
|
Value data = operandAdaptor.data();
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, {data});
|
memRefType, loc, rewriter, insertDealloc, op, {data});
|
||||||
|
|
||||||
// Number of loops
|
// Number of loops
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
|
|
|
@ -44,7 +44,8 @@ struct ONNXUnsqueezeOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
if (hasAllConstantDimensions(memRefType)) {
|
if (hasAllConstantDimensions(memRefType)) {
|
||||||
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
alloc =
|
||||||
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
||||||
for (int i = 0; i < memRefShape.size(); ++i) {
|
for (int i = 0; i < memRefShape.size(); ++i) {
|
||||||
Value dimVal = emitConstantOp(
|
Value dimVal = emitConstantOp(
|
||||||
rewriter, loc, rewriter.getIntegerType(64), memRefShape[i]);
|
rewriter, loc, rewriter.getIntegerType(64), memRefShape[i]);
|
||||||
|
|
|
@ -378,6 +378,7 @@ void addONNXToMLIRPasses(mlir::PassManager &pm) {
|
||||||
|
|
||||||
void addONNXToKrnlPasses(mlir::PassManager &pm) {
|
void addONNXToKrnlPasses(mlir::PassManager &pm) {
|
||||||
pm.addPass(mlir::createLowerToKrnlPass());
|
pm.addPass(mlir::createLowerToKrnlPass());
|
||||||
|
pm.addPass(mlir::createCanonicalizerPass());
|
||||||
pm.addPass(mlir::createPackKrnlGlobalConstantsPass());
|
pm.addPass(mlir::createPackKrnlGlobalConstantsPass());
|
||||||
// An additional pass of canonicalization is helpful because lowering
|
// An additional pass of canonicalization is helpful because lowering
|
||||||
// from ONNX dialect to Standard dialect exposes additional canonicalization
|
// from ONNX dialect to Standard dialect exposes additional canonicalization
|
||||||
|
|
|
@ -87,8 +87,6 @@ public:
|
||||||
|
|
||||||
// Get a KrnlGetRefOp which does not use the current alloc.
|
// Get a KrnlGetRefOp which does not use the current alloc.
|
||||||
if (KrnlGetRefOp unbundledGetRef = getUnbundledGetRef(&allocOp)) {
|
if (KrnlGetRefOp unbundledGetRef = getUnbundledGetRef(&allocOp)) {
|
||||||
unbundledGetRef.dump();
|
|
||||||
|
|
||||||
// Current memory pool size is the offset for the newly bundled
|
// Current memory pool size is the offset for the newly bundled
|
||||||
// internal MemRef. Emit the offset as a constant.
|
// internal MemRef. Emit the offset as a constant.
|
||||||
auto offset = rewriter.create<ConstantOp>(
|
auto offset = rewriter.create<ConstantOp>(
|
||||||
|
|
|
@ -24,10 +24,10 @@ using namespace mlir;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
bool checkOpResultIsReturned(AllocOp *allocOp) {
|
bool checkOpResultIsReturned(AllocOp *allocOp) {
|
||||||
auto parentBlock = allocOp->getOperation()->getBlock();
|
FuncOp function = getContainingFunction(allocOp->getOperation());
|
||||||
|
|
||||||
bool opIsReturned = false;
|
bool opIsReturned = false;
|
||||||
parentBlock->walk([&opIsReturned, allocOp](ReturnOp op) {
|
function.walk([&opIsReturned, allocOp](ReturnOp op) {
|
||||||
auto result = allocOp->getResult();
|
auto result = allocOp->getResult();
|
||||||
for (const auto &operand : op.getOperands())
|
for (const auto &operand : op.getOperands())
|
||||||
if (operand == result)
|
if (operand == result)
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> {
|
func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32>
|
%0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --lower-krnl --lower-all-llvm %s | FileCheck %s
|
||||||
|
|
||||||
func @test_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
func @test_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
||||||
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi64>) -> tensor<*xf32> {
|
func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi64>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.Reshape"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<4xi64>) -> tensor<*xf32>
|
%0 = "onnx.Reshape"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<4xi64>) -> tensor<*xf32>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --bundle-memory-pools --canonicalize %s -split-input-file | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --bundle-memory-pools --canonicalize %s | FileCheck %s
|
||||||
|
|
||||||
func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>) -> tensor<10x20xf32> {
|
func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>) -> tensor<10x20xf32> {
|
||||||
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
||||||
|
@ -10,8 +10,8 @@ func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>
|
||||||
return %5 : tensor<10x20xf32>
|
return %5 : tensor<10x20xf32>
|
||||||
|
|
||||||
// CHECK-LABEL: test_bundle_memory_pool
|
// CHECK-LABEL: test_bundle_memory_pool
|
||||||
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
|
||||||
// CHECK: [[CONST00:%.+]] = constant 0.000000e+00 : f32
|
// CHECK: [[CONST00:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
||||||
// CHECK: [[CONST400:%.+]] = constant 400 : i64
|
// CHECK: [[CONST400:%.+]] = constant 400 : i64
|
||||||
// CHECK: [[CONST1200:%.+]] = constant 1200 : i64
|
// CHECK: [[CONST1200:%.+]] = constant 1200 : i64
|
||||||
// CHECK: [[CONST2000:%.+]] = constant 2000 : i64
|
// CHECK: [[CONST2000:%.+]] = constant 2000 : i64
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool %s -split-input-file | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool %s | FileCheck %s
|
||||||
|
|
||||||
/// One intermediate value to allocate in the memory pool.
|
/// One intermediate value to allocate in the memory pool.
|
||||||
func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
||||||
|
@ -13,10 +13,10 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
||||||
// CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
// CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32>
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
|
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32>
|
||||||
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||||
// CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32>
|
// CHECK: affine.store [[ADDF1]], [[GETREF]][symbol(%arg1), symbol(%arg2)] : memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: dealloc [[MEMPOOL]] : memref<400xi8>
|
// CHECK: dealloc [[MEMPOOL]] : memref<400xi8>
|
||||||
|
@ -31,8 +31,8 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
|
||||||
return %2 : tensor<10x20xf32>
|
return %2 : tensor<10x20xf32>
|
||||||
|
|
||||||
// CHECK-LABEL: test_enable_memory_pool_2
|
// CHECK-LABEL: test_enable_memory_pool_2
|
||||||
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
|
||||||
// CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32
|
// CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x20xf32>
|
// CHECK: [[RES:%.+]] = alloc() : memref<10x20xf32>
|
||||||
// CHECK: [[MEMPOOL0:%.+]] = alloc() : memref<800xi8>
|
// CHECK: [[MEMPOOL0:%.+]] = alloc() : memref<800xi8>
|
||||||
// CHECK: [[GETREF0:%.+]] = "krnl.getref"([[MEMPOOL0]], [[CONST0]]) : (memref<800xi8>, i64) -> memref<10x20xf32>
|
// CHECK: [[GETREF0:%.+]] = "krnl.getref"([[MEMPOOL0]], [[CONST0]]) : (memref<800xi8>, i64) -> memref<10x20xf32>
|
||||||
|
@ -40,24 +40,24 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
|
||||||
// CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
// CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32>
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32>
|
||||||
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||||
// CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32>
|
// CHECK: affine.store [[ADDF1]], [[GETREF1]][symbol(%arg2), symbol(%arg3)] : memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32>
|
// CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][symbol(%arg2), symbol(%arg4)] : memref<10x10xf32>
|
||||||
// CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32>
|
// CHECK: [[LOAD4:%.+]] = affine.load %arg1[symbol(%arg4), symbol(%arg3)] : memref<10x20xf32>
|
||||||
// CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
// CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
||||||
// CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32
|
// CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32
|
||||||
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
|
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
|
||||||
// CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
// CHECK: affine.store [[ADDF2]], [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
// CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
||||||
// CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32>
|
// CHECK: [[LOAD7:%.+]] = affine.load %arg1[symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
||||||
// CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32
|
// CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32
|
||||||
// CHECK: affine.store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32>
|
// CHECK: affine.store [[ADDF3]], [[RES]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
||||||
// CHECK: dealloc [[MEMPOOL1]] : memref<400xi8>
|
// CHECK: dealloc [[MEMPOOL1]] : memref<400xi8>
|
||||||
// CHECK: dealloc [[MEMPOOL0]] : memref<800xi8>
|
// CHECK: dealloc [[MEMPOOL0]] : memref<800xi8>
|
||||||
// CHECK: return [[RES]] : memref<10x20xf32>
|
// CHECK: return [[RES]] : memref<10x20xf32>
|
||||||
|
|
|
@ -695,100 +695,6 @@ func @test_add_with_broadcasting(%arg0 : tensor<?xf32>, %arg1 : tensor<?x10xf32>
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reducemax
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reducemin
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reduceprod
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reducesum
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> {
|
func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.Softmax"(%arg0) {axis=1:i64} : (tensor<10x10xf32>) -> tensor<*xf32>
|
%0 = "onnx.Softmax"(%arg0) {axis=1:i64} : (tensor<10x10xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
@ -1107,10 +1013,10 @@ func @test_matmul5(%arg0 : tensor<5xf32>, %arg1 : tensor<?x5x10xf32>) -> tensor<
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_matmul5
|
// CHECK-LABEL: test_matmul5
|
||||||
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref<?x5x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref<?x5x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -1139,10 +1045,10 @@ func @test_matmul6(%arg0 : tensor<?x10x5xf32>, %arg1 : tensor<5xf32>) -> tensor<
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_matmul6
|
// CHECK-LABEL: test_matmul6
|
||||||
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10x5xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10x5xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -1515,506 +1421,3 @@ func @test_concat_1(%arg0 : tensor<5x5x1x32xf32>, %arg1 : tensor<5x5x3x32xf32>,
|
||||||
|
|
||||||
// CHECK: return [[RES]] : memref<5x5x9x32xf32>
|
// CHECK: return [[RES]] : memref<5x5x9x32xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)>
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)>
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<() -> (0)>
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)>
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_pool_general_computation
|
|
||||||
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
|
|
||||||
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
|
||||||
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
|
||||||
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
|
|
||||||
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
|
||||||
// CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-DAG: #[[AFFINE_MAP:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)>
|
|
||||||
// CHECK-LABEL: test_pool_unknown_dimensions
|
|
||||||
// CHECK: [[C0:%.+]] = constant 2 : index
|
|
||||||
// CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32>
|
|
||||||
// CHECK: [[KERNEL:%.+]] = constant 2 : index
|
|
||||||
// CHECK: [[PAD:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[STRIDE:%.+]] = constant 1 : index
|
|
||||||
// CHECK: [[DILATION:%.+]] = constant 1 : index
|
|
||||||
// CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}}
|
|
||||||
// CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_averagepool_identity_value
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_maxpool_identity_value
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_averagepool_pooling_operation
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
|
|
||||||
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
|
||||||
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
|
||||||
|
|
||||||
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
|
||||||
|
|
||||||
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
|
||||||
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
|
||||||
// CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32
|
|
||||||
// CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_maxpool_pooling_operation
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
|
|
||||||
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
|
||||||
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
|
||||||
|
|
||||||
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
|
||||||
|
|
||||||
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
|
||||||
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
|
||||||
// CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
|
||||||
%cst = constant unit
|
|
||||||
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
|
||||||
return %Y_h : tensor<*xf32>
|
|
||||||
|
|
||||||
// CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)>
|
|
||||||
// CHECK-LABEL: @test_lstm_general_computation
|
|
||||||
|
|
||||||
// CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
|
||||||
// CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
|
||||||
// CHECK: {{.*}} = constant unit
|
|
||||||
|
|
||||||
// CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) {
|
|
||||||
// CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
// CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: {{.*}} = constant 0 : index
|
|
||||||
// CHECK: {{.*}} = constant 3 : index
|
|
||||||
// CHECK: {{.*}} = constant 0 : index
|
|
||||||
// CHECK: {{.*}} = constant 1 : index
|
|
||||||
// CHECK: {{.*}} = constant 2 : index
|
|
||||||
// CHECK: {{.*}} = constant 3 : index
|
|
||||||
// CHECK: {{.*}} = constant 4 : index
|
|
||||||
// CHECK: {{.*}} = constant 5 : index
|
|
||||||
// CHECK: {{.*}} = constant 6 : index
|
|
||||||
// CHECK: {{.*}} = constant 7 : index
|
|
||||||
// CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) {
|
|
||||||
// CHECK: [[hCt:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[Ot:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[ct:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[Ft:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[It:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
// CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
|
|
||||||
// CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) {
|
|
||||||
// CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3]
|
|
||||||
// CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3]
|
|
||||||
// CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3]
|
|
||||||
// CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3]
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[It]][] : memref<f32>
|
|
||||||
// CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ft]][] : memref<f32>
|
|
||||||
// CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[TANH_CELL:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[ct]][] : memref<f32>
|
|
||||||
// CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32
|
|
||||||
// CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32
|
|
||||||
// CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32
|
|
||||||
// CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
|
|
||||||
// CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ot]][] : memref<f32>
|
|
||||||
// CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[hCt]][] : memref<f32>
|
|
||||||
// CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32
|
|
||||||
// CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
|
|
||||||
// CHECK: dealloc [[XtWi_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[XtWo_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[XtWf_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[XtWc_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Ri_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Ro_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Rf_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Rc_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[It]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ft]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[ct]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ot]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[hCt]] : memref<f32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32>
|
|
||||||
// CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
|
||||||
%cst = constant unit
|
|
||||||
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
|
||||||
return %Y_h : tensor<*xf32>
|
|
||||||
|
|
||||||
// CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
|
||||||
// CHECK-LABEL: @test_lstm_reverse_mode
|
|
||||||
|
|
||||||
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
|
||||||
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
|
||||||
%cst = constant unit
|
|
||||||
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
|
||||||
return %Y_h : tensor<*xf32>
|
|
||||||
|
|
||||||
// CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
|
||||||
// CHECK-LABEL: @test_lstm_bidirectional_mode
|
|
||||||
|
|
||||||
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
|
||||||
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>)
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_squeeze
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32>
|
|
||||||
// CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64
|
|
||||||
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> ()
|
|
||||||
// CHECK: return [[RES]] : memref<16x32x64xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_squeeze_unknown_dimensions(%arg0 : tensor<?x1x32x?x64xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor<?x1x32x?x64xf32>) -> (tensor<*xf32>)
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_squeeze_unknown_dimensions
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x1x32x?x64xf32>
|
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x32x64xf32>
|
|
||||||
// CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64
|
|
||||||
// CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64
|
|
||||||
// CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64
|
|
||||||
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref<?x32x64xf32>, memref<?x1x32x?x64xf32>, i64) -> ()
|
|
||||||
// CHECK: return [[RES]] : memref<?x32x64xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
|
||||||
%0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
|
||||||
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 8)>
|
|
||||||
// CHECK-LABEL: @test_split_equal
|
|
||||||
|
|
||||||
// CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32>
|
|
||||||
// CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32>
|
|
||||||
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1)
|
|
||||||
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
|
||||||
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
|
||||||
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
|
||||||
// CHECK-LABEL: @test_split_variable
|
|
||||||
|
|
||||||
// CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32>
|
|
||||||
// CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32>
|
|
||||||
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2)
|
|
||||||
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_split_unknown_dimension(%arg0 : tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
|
||||||
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
|
||||||
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
|
||||||
// CHECK-LABEL: @test_split_unknown_dimension
|
|
||||||
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x?x64xf32>
|
|
||||||
// CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref<?x2x64xf32>
|
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref<?x?x64xf32>
|
|
||||||
// CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref<?x30x64xf32>
|
|
||||||
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref<?x2x64xf32>
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<?x?x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<?x2x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: [[C0_3:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref<?x30x64xf32>
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2)
|
|
||||||
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<?x?x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<?x30x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES_0]], [[RES_1]] : memref<?x2x64xf32>, memref<?x30x64xf32>
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,263 @@
|
||||||
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
|
func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
||||||
|
%cst = constant unit
|
||||||
|
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
||||||
|
return %Y_h : tensor<*xf32>
|
||||||
|
|
||||||
|
// CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_lstm_general_computation
|
||||||
|
|
||||||
|
// CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
||||||
|
// CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
||||||
|
// CHECK: {{.*}} = constant unit
|
||||||
|
|
||||||
|
// CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) {
|
||||||
|
// CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
// CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: {{.*}} = constant 0 : index
|
||||||
|
// CHECK: {{.*}} = constant 3 : index
|
||||||
|
// CHECK: {{.*}} = constant 0 : index
|
||||||
|
// CHECK: {{.*}} = constant 1 : index
|
||||||
|
// CHECK: {{.*}} = constant 2 : index
|
||||||
|
// CHECK: {{.*}} = constant 3 : index
|
||||||
|
// CHECK: {{.*}} = constant 4 : index
|
||||||
|
// CHECK: {{.*}} = constant 5 : index
|
||||||
|
// CHECK: {{.*}} = constant 6 : index
|
||||||
|
// CHECK: {{.*}} = constant 7 : index
|
||||||
|
// CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) {
|
||||||
|
// CHECK: [[hCt:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[Ot:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[ct:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[Ft:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[It:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
// CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
|
||||||
|
// CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) {
|
||||||
|
// CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3]
|
||||||
|
// CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3]
|
||||||
|
// CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3]
|
||||||
|
// CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3]
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[It]][] : memref<f32>
|
||||||
|
// CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ft]][] : memref<f32>
|
||||||
|
// CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[TANH_CELL:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[ct]][] : memref<f32>
|
||||||
|
// CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32
|
||||||
|
// CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32
|
||||||
|
// CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32
|
||||||
|
// CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
|
||||||
|
// CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ot]][] : memref<f32>
|
||||||
|
// CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[hCt]][] : memref<f32>
|
||||||
|
// CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32
|
||||||
|
// CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
|
||||||
|
// CHECK: dealloc [[XtWi_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[XtWo_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[XtWf_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[XtWc_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Ri_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Ro_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Rf_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Rc_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[It]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ft]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[ct]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ot]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[hCt]] : memref<f32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32>
|
||||||
|
// CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
||||||
|
%cst = constant unit
|
||||||
|
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
||||||
|
return %Y_h : tensor<*xf32>
|
||||||
|
|
||||||
|
// CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_lstm_reverse_mode
|
||||||
|
|
||||||
|
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
||||||
|
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
||||||
|
%cst = constant unit
|
||||||
|
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
||||||
|
return %Y_h : tensor<*xf32>
|
||||||
|
|
||||||
|
// CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_lstm_bidirectional_mode
|
||||||
|
|
||||||
|
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
||||||
|
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
||||||
|
}
|
|
@ -0,0 +1,121 @@
|
||||||
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s | FileCheck %s
|
||||||
|
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)>
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)>
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<() -> (0)>
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)>
|
||||||
|
|
||||||
|
// CHECK-DAG: #[[AFFINE_MAP1:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)>
|
||||||
|
|
||||||
|
func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_pool_general_computation
|
||||||
|
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
|
||||||
|
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
||||||
|
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
|
||||||
|
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
||||||
|
// CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
}
|
||||||
|
|
||||||
|
func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_pool_unknown_dimensions
|
||||||
|
// CHECK: [[C0:%.+]] = constant 2 : index
|
||||||
|
// CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32>
|
||||||
|
// CHECK: [[KERNEL:%.+]] = constant 2 : index
|
||||||
|
// CHECK: [[PAD:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[STRIDE:%.+]] = constant 1 : index
|
||||||
|
// CHECK: [[DILATION:%.+]] = constant 1 : index
|
||||||
|
// CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP1]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}}
|
||||||
|
// CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_averagepool_identity_value
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_maxpool_identity_value
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_averagepool_pooling_operation
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
|
||||||
|
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
||||||
|
|
||||||
|
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
||||||
|
|
||||||
|
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
||||||
|
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
||||||
|
// CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32
|
||||||
|
// CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_maxpool_pooling_operation
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
|
||||||
|
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
||||||
|
|
||||||
|
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
||||||
|
|
||||||
|
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
||||||
|
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
||||||
|
// CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
||||||
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
}
|
|
@ -0,0 +1,93 @@
|
||||||
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
|
func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reducemax
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reducemin
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reduceprod
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reducesum
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
|
@ -0,0 +1,85 @@
|
||||||
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
|
func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
||||||
|
%0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
||||||
|
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK: [[INDEX_MAP1:#.+]] = affine_map<(d0) -> (d0 + 8)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_split_equal
|
||||||
|
|
||||||
|
// CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32>
|
||||||
|
// CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32>
|
||||||
|
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP1]](%arg1)
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
||||||
|
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
||||||
|
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK: [[INDEX_MAP2:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_split_variable
|
||||||
|
|
||||||
|
// CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32>
|
||||||
|
// CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32>
|
||||||
|
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP2]](%arg2)
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_split_unknown_dimension(%arg0 : tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
||||||
|
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
||||||
|
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK: [[INDEX_MAP3:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_split_unknown_dimension
|
||||||
|
|
||||||
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x?x64xf32>
|
||||||
|
// CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref<?x2x64xf32>
|
||||||
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref<?x?x64xf32>
|
||||||
|
// CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref<?x30x64xf32>
|
||||||
|
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref<?x2x64xf32>
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<?x?x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<?x2x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: [[C0_3:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref<?x30x64xf32>
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP3]](%arg2)
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<?x?x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<?x30x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES_0]], [[RES_1]] : memref<?x2x64xf32>, memref<?x30x64xf32>
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
|
func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>)
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_squeeze
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32>
|
||||||
|
// CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64
|
||||||
|
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> ()
|
||||||
|
// CHECK: return [[RES]] : memref<16x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_squeeze_unknown_dimensions(%arg0 : tensor<?x1x32x?x64xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor<?x1x32x?x64xf32>) -> (tensor<*xf32>)
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_squeeze_unknown_dimensions
|
||||||
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x1x32x?x64xf32>
|
||||||
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x32x64xf32>
|
||||||
|
// CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64
|
||||||
|
// CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64
|
||||||
|
// CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64
|
||||||
|
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref<?x32x64xf32>, memref<?x1x32x?x64xf32>, i64) -> ()
|
||||||
|
// CHECK: return [[RES]] : memref<?x32x64xf32>
|
||||||
|
}
|
|
@ -239,10 +239,15 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_exp_exp
|
// CHECK-LABEL: test_exp_exp
|
||||||
/// First Exp
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Exp
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -252,9 +257,6 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Exp
|
/// Second Exp
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -278,10 +280,14 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_tanh_tanh
|
// CHECK-LABEL: test_tanh_tanh
|
||||||
/// First Tanh
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Tanh
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -297,9 +303,6 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Tanh
|
/// Second Tanh
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -329,10 +332,14 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_sinh_sinh
|
// CHECK-LABEL: test_sinh_sinh
|
||||||
/// First Sinh
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Sinh
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -348,9 +355,6 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Sinh
|
/// Second Sinh
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -380,10 +384,14 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_cosh_cosh
|
// CHECK-LABEL: test_cosh_cosh
|
||||||
/// First Cosh
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Cosh
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -399,9 +407,6 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Cosh
|
/// Second Cosh
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -430,10 +435,14 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_sigmoid_sigmoid
|
// CHECK-LABEL: test_sigmoid_sigmoid
|
||||||
/// First Sigmoid
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Sigmoid
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -448,9 +457,6 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Sigmoid
|
/// Second Sigmoid
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -479,10 +485,14 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_relu_relu
|
// CHECK-LABEL: test_relu_relu
|
||||||
/// First Relu
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Relu
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -494,9 +504,6 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Relu
|
/// Second Relu
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -625,10 +632,14 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_elu_elu
|
// CHECK-LABEL: test_elu_elu
|
||||||
/// First Elu
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Elu
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -645,9 +656,6 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Elu
|
/// Second Elu
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -678,10 +686,14 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_leakyrelu_leakyrelu
|
// CHECK-LABEL: test_leakyrelu_leakyrelu
|
||||||
/// First LeakyRelu
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First LeakyRelu
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -695,9 +707,6 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second LeakyRelu
|
/// Second LeakyRelu
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -725,10 +734,14 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_selu_selu
|
// CHECK-LABEL: test_selu_selu
|
||||||
/// First Selu
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Selu
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -746,9 +759,6 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Selu
|
/// Second Selu
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -780,10 +790,14 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_hardsigmoid_hardsigmoid
|
// CHECK-LABEL: test_hardsigmoid_hardsigmoid
|
||||||
/// First HardSigmoid
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First HardSigmoid
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -802,9 +816,6 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second HardSigmoid
|
/// Second HardSigmoid
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -837,10 +848,14 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_reciprocal_reciprocal
|
// CHECK-LABEL: test_reciprocal_reciprocal
|
||||||
/// First Reciprocal
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
|
|
||||||
|
/// First Reciprocal
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -851,9 +866,6 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Reciprocal
|
/// Second Reciprocal
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt %s -split-input-file | FileCheck %s
|
// RUN: onnx-mlir-opt %s | FileCheck %s
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// CHECK-LABEL: @check_map1(%arg0: tuple<i64, f32>) -> tensor<*xf32> {
|
// CHECK-LABEL: @check_map1(%arg0: tuple<i64, f32>) -> tensor<*xf32> {
|
||||||
|
|
Loading…
Reference in New Issue