This reverts commit b27e57cc4f
.
This commit is contained in:
parent
b27e57cc4f
commit
a58594ec81
|
@ -1,4 +1,5 @@
|
||||||
//====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering -------===//
|
//====------ ConvertONNXToKrnl.cpp - ONNX dialects to Krnl lowering
|
||||||
|
//--------===//
|
||||||
//
|
//
|
||||||
// Copyright 2019 The IBM Research Authors.
|
// Copyright 2019 The IBM Research Authors.
|
||||||
//
|
//
|
||||||
|
@ -33,38 +34,6 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// FuncOp lowering to Function with init and main blocks.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
struct FuncOpSignatureConversion : public OpConversionPattern<FuncOp> {
|
|
||||||
FuncOpSignatureConversion(MLIRContext *ctx, TypeConverter &converter)
|
|
||||||
: OpConversionPattern(converter, ctx) {}
|
|
||||||
|
|
||||||
/// Hook for derived classes to implement combined matching and rewriting.
|
|
||||||
LogicalResult matchAndRewrite(FuncOp funcOp, ArrayRef<Value> operands,
|
|
||||||
ConversionPatternRewriter &rewriter) const override {
|
|
||||||
FunctionType type = funcOp.getType();
|
|
||||||
|
|
||||||
// Convert the original function types.
|
|
||||||
TypeConverter::SignatureConversion result(type.getNumInputs());
|
|
||||||
SmallVector<Type, 1> newResults;
|
|
||||||
if (failed(typeConverter->convertSignatureArgs(type.getInputs(), result)) ||
|
|
||||||
failed(typeConverter->convertTypes(type.getResults(), newResults)) ||
|
|
||||||
failed(rewriter.convertRegionTypes(
|
|
||||||
&funcOp.getBody(), *typeConverter, &result)))
|
|
||||||
return failure();
|
|
||||||
|
|
||||||
// Update the function signature in-place.
|
|
||||||
rewriter.updateRootInPlace(funcOp, [&] {
|
|
||||||
funcOp.setType(FunctionType::get(
|
|
||||||
result.getConvertedTypes(), newResults, funcOp.getContext()));
|
|
||||||
});
|
|
||||||
addInitBlock(rewriter, funcOp.getLoc(), funcOp);
|
|
||||||
return success();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Frontend to Krnl Dialect lowering pass
|
// Frontend to Krnl Dialect lowering pass
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -80,10 +49,6 @@ struct FrontendToKrnlLoweringPass
|
||||||
void FrontendToKrnlLoweringPass::runOnOperation() {
|
void FrontendToKrnlLoweringPass::runOnOperation() {
|
||||||
ModuleOp module = getOperation();
|
ModuleOp module = getOperation();
|
||||||
|
|
||||||
// Create an entry for this module
|
|
||||||
initMap.insert(std::pair<ModuleOp, std::unique_ptr<FunctionToInitStates>>(
|
|
||||||
module, std::make_unique<FunctionToInitStates>()));
|
|
||||||
|
|
||||||
// The first thing to define is the conversion target. This will define the
|
// The first thing to define is the conversion target. This will define the
|
||||||
// final target for this lowering.
|
// final target for this lowering.
|
||||||
ConversionTarget target(getContext());
|
ConversionTarget target(getContext());
|
||||||
|
@ -112,6 +77,12 @@ void FrontendToKrnlLoweringPass::runOnOperation() {
|
||||||
return tensor_to_memref_converter.isSignatureLegal(op.getType());
|
return tensor_to_memref_converter.isSignatureLegal(op.getType());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Type conversion for function signatures.
|
||||||
|
// Call MLIR FuncOp signature conversion when result type is
|
||||||
|
// a ranked tensor.
|
||||||
|
populateFuncOpTypeConversionPattern(
|
||||||
|
patterns, &getContext(), tensor_to_memref_converter);
|
||||||
|
|
||||||
// Frontend operation lowering.
|
// Frontend operation lowering.
|
||||||
// Math
|
// Math
|
||||||
populateLoweringONNXElementwiseOpPattern(patterns, &getContext());
|
populateLoweringONNXElementwiseOpPattern(patterns, &getContext());
|
||||||
|
@ -138,16 +109,12 @@ void FrontendToKrnlLoweringPass::runOnOperation() {
|
||||||
populateLoweringONNXLSTMOpPattern(patterns, &getContext());
|
populateLoweringONNXLSTMOpPattern(patterns, &getContext());
|
||||||
// Entry point
|
// Entry point
|
||||||
patterns.insert<ONNXEntryPointLowering>(&getContext());
|
patterns.insert<ONNXEntryPointLowering>(&getContext());
|
||||||
patterns.insert<FuncOpSignatureConversion>(
|
|
||||||
&getContext(), tensor_to_memref_converter);
|
|
||||||
|
|
||||||
// With the target and rewrite patterns defined, we can now attempt the
|
// With the target and rewrite patterns defined, we can now attempt the
|
||||||
// conversion. The conversion will signal failure if any of our `illegal`
|
// conversion. The conversion will signal failure if any of our `illegal`
|
||||||
// operations were not converted successfully.
|
// operations were not converted successfully.
|
||||||
if (failed(applyPartialConversion(module, target, patterns)))
|
if (failed(applyPartialConversion(module, target, patterns)))
|
||||||
signalPassFailure();
|
signalPassFailure();
|
||||||
|
|
||||||
initMap.erase(module);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Pass> mlir::createLowerToKrnlPass() {
|
std::unique_ptr<Pass> mlir::createLowerToKrnlPass() {
|
||||||
|
|
|
@ -518,11 +518,10 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc =
|
||||||
memRefType, loc, rewriter, insertDealloc, op, {X});
|
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, {X});
|
||||||
|
|
||||||
SmallVector<Value, 4> loopIVs;
|
SmallVector<Value, 4> loopIVs;
|
||||||
if (!hasAllScalarValues(operands)) {
|
if (!hasAllScalarValues(operands)) {
|
||||||
|
@ -575,11 +574,10 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
|
||||||
// comes from.
|
// comes from.
|
||||||
// TODO: can the dimension of the result differ after optimizations?
|
// TODO: can the dimension of the result differ after optimizations?
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, op, operands);
|
memRefType, loc, rewriter, insertDealloc, operands);
|
||||||
|
|
||||||
SmallVector<Value, 4> loopIVs;
|
SmallVector<Value, 4> loopIVs;
|
||||||
std::map<int, std::map<int, Value>> broadcastedDimInfo;
|
std::map<int, std::map<int, Value>> broadcastedDimInfo;
|
||||||
|
|
|
@ -46,8 +46,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else {
|
else {
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
SmallVector<Value, 2> allocOperands;
|
SmallVector<Value, 2> allocOperands;
|
||||||
|
|
|
@ -43,16 +43,8 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else {
|
else {
|
||||||
PatternRewriter::InsertionGuard insertGuard(rewriter);
|
|
||||||
FuncOp function = getContainingFunction(op);
|
|
||||||
bool functionLevelAlloc = (op->getParentOp() == function);
|
|
||||||
bool canMove = checkAllocMovable(function, functionLevelAlloc, {A, B});
|
|
||||||
if (canMove)
|
|
||||||
rewriter.setInsertionPoint(getInitInsertionPoint(function));
|
|
||||||
|
|
||||||
SmallVector<Value, 4> allocOperands;
|
SmallVector<Value, 4> allocOperands;
|
||||||
if (AShape.size() >= 2 && BShape.size() >= 2) {
|
if (AShape.size() >= 2 && BShape.size() >= 2) {
|
||||||
// Both arguments are N-D, N >= 2
|
// Both arguments are N-D, N >= 2
|
||||||
|
@ -116,9 +108,6 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
||||||
}
|
}
|
||||||
|
|
||||||
alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands);
|
alloc = rewriter.create<AllocOp>(loc, memRefType, allocOperands);
|
||||||
|
|
||||||
if (canMove)
|
|
||||||
markOperandInInitBlock(function, alloc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AShape.size() >= 2 || BShape.size() >= 2) {
|
if (AShape.size() >= 2 || BShape.size() >= 2) {
|
||||||
|
|
|
@ -159,8 +159,8 @@ struct ONNXReductionOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefOutType)) {
|
if (hasAllConstantDimensions(memRefOutType)) {
|
||||||
alloc = insertAllocAndDealloc(
|
alloc =
|
||||||
memRefOutType, loc, rewriter, insertDealloc, op);
|
insertAllocAndDealloc(memRefOutType, loc, rewriter, insertDealloc);
|
||||||
} else {
|
} else {
|
||||||
SmallVector<Value, 2> allocOperands;
|
SmallVector<Value, 2> allocOperands;
|
||||||
for (decltype(outRank) i = 0; i < outRank; ++i) {
|
for (decltype(outRank) i = 0; i < outRank; ++i) {
|
||||||
|
|
|
@ -36,21 +36,18 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, op, input);
|
memRefType, loc, rewriter, insertDealloc, input);
|
||||||
|
|
||||||
// Shape of the result
|
// Shape of the result
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
|
|
||||||
// Insert allocations and deallocations for sum and max.
|
// Insert allocations and deallocations for sum and max.
|
||||||
MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0);
|
MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0);
|
||||||
Value sumOp =
|
Value sumOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true);
|
||||||
insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op);
|
Value maxOp = insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true);
|
||||||
Value maxOp =
|
|
||||||
insertAllocAndDealloc(scalarMemRefType, loc, rewriter, true, op);
|
|
||||||
Value zero = emitConstantOp(rewriter, loc, elementType, 0);
|
Value zero = emitConstantOp(rewriter, loc, elementType, 0);
|
||||||
Value negInfinity = rewriter.create<ConstantOp>(loc,
|
Value negInfinity = rewriter.create<ConstantOp>(loc,
|
||||||
FloatAttr::get(elementType, -std::numeric_limits<float>::infinity()));
|
FloatAttr::get(elementType, -std::numeric_limits<float>::infinity()));
|
||||||
|
|
|
@ -36,11 +36,10 @@ struct ONNXConvOpLowering : public ConversionPattern {
|
||||||
bool hasBias = !biasOperand.getType().isa<NoneType>();
|
bool hasBias = !biasOperand.getType().isa<NoneType>();
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, op, {inputOperand});
|
memRefType, loc, rewriter, insertDealloc, {inputOperand});
|
||||||
|
|
||||||
// R = Conv(D, K)
|
// R = Conv(D, K)
|
||||||
//
|
//
|
||||||
|
|
|
@ -42,11 +42,10 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, op, {operand});
|
memRefType, loc, rewriter, insertDealloc, {operand});
|
||||||
|
|
||||||
// Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N.
|
// Operand's dimensions can be in the form of NxCxD1xD2x...xDn or N.
|
||||||
// In case of N, C is assumed to be 1.
|
// In case of N, C is assumed to be 1.
|
||||||
|
|
|
@ -235,8 +235,7 @@ struct ONNXPoolOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else {
|
else {
|
||||||
alloc = insertAllocAndDeallocForPooling(rewriter, loc, insertDealloc,
|
alloc = insertAllocAndDeallocForPooling(rewriter, loc, insertDealloc,
|
||||||
memRefType, inputOperand, kernelShape, pads, strides, dilations,
|
memRefType, inputOperand, kernelShape, pads, strides, dilations,
|
||||||
|
|
|
@ -11,8 +11,6 @@
|
||||||
|
|
||||||
#include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
|
#include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
|
||||||
|
|
||||||
std::map<ModuleOp, std::unique_ptr<FunctionToInitStates>> initMap;
|
|
||||||
|
|
||||||
/// Check is all dimensions are known at compile time.
|
/// Check is all dimensions are known at compile time.
|
||||||
bool hasAllConstantDimensions(MemRefType type) {
|
bool hasAllConstantDimensions(MemRefType type) {
|
||||||
auto memRefShape = type.getShape();
|
auto memRefShape = type.getShape();
|
||||||
|
@ -45,151 +43,11 @@ MemRefType convertToMemRefType(Type type) {
|
||||||
return memRefType;
|
return memRefType;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve function which contains the current operation.
|
|
||||||
FuncOp getContainingFunction(Operation *op) {
|
|
||||||
Operation *parentFuncOp = op->getParentOp();
|
|
||||||
|
|
||||||
// While parent is not a FuncOp and its cast to a FuncOp is null.
|
|
||||||
while (!llvm::dyn_cast_or_null<FuncOp>(parentFuncOp))
|
|
||||||
parentFuncOp = parentFuncOp->getParentOp();
|
|
||||||
|
|
||||||
return cast<FuncOp>(parentFuncOp);
|
|
||||||
}
|
|
||||||
|
|
||||||
void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp function) {
|
|
||||||
// If this is the first time we encounter an operation in this
|
|
||||||
// function, we create an entry inside the initMap and split the
|
|
||||||
// function body into an init block and a main block.
|
|
||||||
//
|
|
||||||
// function func_name() {
|
|
||||||
// ... init block ...
|
|
||||||
// br ^bb1
|
|
||||||
// ^bb1: // pred: ^bb0
|
|
||||||
// ... main block ...
|
|
||||||
// return
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// Note: the block ^bb0 being the first block has its label omitted.
|
|
||||||
//
|
|
||||||
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
|
||||||
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
|
||||||
if (initStates->count(function) == 0) {
|
|
||||||
initStates->insert(
|
|
||||||
std::pair<FuncOp, std::unique_ptr<ONNXOperandsInitState>>(
|
|
||||||
function, std::make_unique<ONNXOperandsInitState>()));
|
|
||||||
std::unique_ptr<ONNXOperandsInitState> &initState =
|
|
||||||
initStates->at(function);
|
|
||||||
|
|
||||||
// All input arguments are considered as part of the initialization block
|
|
||||||
// so add them to the operandsInInitBlock set.
|
|
||||||
Block *functionBlock = &function.front();
|
|
||||||
for (auto arg : functionBlock->getArguments())
|
|
||||||
initState->operandsInInitBlock.insert(arg);
|
|
||||||
|
|
||||||
PatternRewriter::InsertionGuard insertGuard(rewriter);
|
|
||||||
rewriter.setInsertionPointToStart(functionBlock);
|
|
||||||
|
|
||||||
initState->initBlock = rewriter.getInsertionBlock();
|
|
||||||
auto currentPoint = rewriter.getInsertionPoint();
|
|
||||||
initState->mainBlock =
|
|
||||||
rewriter.splitBlock(initState->initBlock, currentPoint);
|
|
||||||
|
|
||||||
rewriter.setInsertionPointToEnd(initState->initBlock);
|
|
||||||
|
|
||||||
// Insert a branch operation from initBlock to mainBlock. This
|
|
||||||
// ensures the final code contains legal blocks.
|
|
||||||
initState->branchInit =
|
|
||||||
rewriter.create<BranchOp>(loc, initState->mainBlock);
|
|
||||||
|
|
||||||
// Set insertion point to start of mainBlock.
|
|
||||||
rewriter.setInsertionPointToStart(initState->mainBlock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool containingFunctionHasInitBlock(Operation *op) {
|
|
||||||
FuncOp function = getContainingFunction(op);
|
|
||||||
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
|
||||||
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
|
||||||
return initStates->count(function) > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Block *getInitBlock(FuncOp function) {
|
|
||||||
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
|
||||||
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
|
||||||
assert(initStates->count(function) > 0 &&
|
|
||||||
"Initialization state not defined for this function.");
|
|
||||||
return initStates->at(function)->initBlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
Block *getMainBlock(FuncOp function) {
|
|
||||||
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
|
||||||
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
|
||||||
assert(initStates->count(function) > 0 &&
|
|
||||||
"Initialization state not defined for this function.");
|
|
||||||
return initStates->at(function)->mainBlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
BranchOp getInitInsertionPoint(FuncOp function) {
|
|
||||||
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
|
||||||
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
|
||||||
assert(initStates->count(function) > 0 &&
|
|
||||||
"Initialization state not defined for this function.");
|
|
||||||
return initStates->at(function)->branchInit;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if all operands used for allocating the size of the result are
|
|
||||||
/// in the initialization block (i.e. initBlock).
|
|
||||||
bool checkAllocMovable(
|
|
||||||
FuncOp function, bool functionLevelAlloc, ArrayRef<Value> operands) {
|
|
||||||
// If no initialization block exists then alloc cannot be moved.
|
|
||||||
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
|
||||||
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
|
||||||
if (initStates->count(function) == 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// If the alloc is not function level alloc then it cannot be moved.
|
|
||||||
if (!functionLevelAlloc)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
bool allInitOrArg = true;
|
|
||||||
for (int i = 0; i < operands.size(); i++) {
|
|
||||||
if (initStates->at(function)->operandsInInitBlock.count(operands[i]) == 0)
|
|
||||||
allInitOrArg = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return allInitOrArg;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add operand to list of operands in the init block.
|
|
||||||
void markOperandInInitBlock(FuncOp function, Value operand) {
|
|
||||||
// Check if function is valid. At this point it has to be.
|
|
||||||
assert(function && "Attempt to add operand when function is null.");
|
|
||||||
ModuleOp module = cast<ModuleOp>(function.getParentOp());
|
|
||||||
std::unique_ptr<FunctionToInitStates> &initStates = initMap.at(module);
|
|
||||||
// A valid function must have an initialization state.
|
|
||||||
assert(initStates->count(function) > 0 &&
|
|
||||||
"Initialization state not defined for this function.");
|
|
||||||
initStates->at(function)->operandsInInitBlock.insert(operand);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Insert an allocation and deallocation for the given MemRefType.
|
/// Insert an allocation and deallocation for the given MemRefType.
|
||||||
Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
|
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
||||||
PatternRewriter &rewriter, bool insertDealloc, FuncOp function,
|
PatternRewriter &rewriter, bool insertDealloc, ArrayRef<Value> operands,
|
||||||
bool functionLevelAlloc, ArrayRef<Value> operands, int64_t alignment) {
|
int64_t alignment) {
|
||||||
// Put together alloc operands for any dynamic dimensions of the memref.
|
// Put together alloc operands for any dynamic dimensions of the memref.
|
||||||
// Save insertion point in case we need to change it to the initBlock.
|
|
||||||
PatternRewriter::InsertionGuard insertGuard(rewriter);
|
|
||||||
|
|
||||||
// Check if all operands of the alloc are in the init region or are input
|
|
||||||
// arguments. If some of them are not or there is no init block, this
|
|
||||||
// variable will be false.
|
|
||||||
bool canMove = checkAllocMovable(function, functionLevelAlloc, operands);
|
|
||||||
|
|
||||||
// If a legal move to the init block is possible, set insertion point
|
|
||||||
// at the end of the initialization block just before the branch instruction.
|
|
||||||
if (canMove)
|
|
||||||
rewriter.setInsertionPoint(getInitInsertionPoint(function));
|
|
||||||
|
|
||||||
AllocOp alloc;
|
AllocOp alloc;
|
||||||
if (!operands.empty()) {
|
if (!operands.empty()) {
|
||||||
auto memRefShape = type.getShape();
|
auto memRefShape = type.getShape();
|
||||||
|
@ -239,11 +97,6 @@ Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
|
||||||
} else {
|
} else {
|
||||||
alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
|
alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the alloc was emitted inside the initializatin block then mark add
|
|
||||||
// it to the set of values emitted in the initialization block.
|
|
||||||
if (canMove)
|
|
||||||
markOperandInInitBlock(function, alloc.getResult());
|
|
||||||
} else {
|
} else {
|
||||||
// Set alignment attribute. Default value is `-1`, which does not set
|
// Set alignment attribute. Default value is `-1`, which does not set
|
||||||
// alignment.
|
// alignment.
|
||||||
|
@ -260,52 +113,17 @@ Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
|
||||||
// Make sure to allocate at the beginning of the block if
|
// Make sure to allocate at the beginning of the block if
|
||||||
// all dimensions are known.
|
// all dimensions are known.
|
||||||
auto *parentBlock = alloc.getOperation()->getBlock();
|
auto *parentBlock = alloc.getOperation()->getBlock();
|
||||||
if (hasAllConstantDimensions(type)) {
|
if (hasAllConstantDimensions(type))
|
||||||
// Check if this move is a move to the init block or to the top of the
|
|
||||||
// function without an init block. For the case in which all dimensions
|
|
||||||
// are constant, the `canMove` variable will be false if there is no
|
|
||||||
// init block.
|
|
||||||
if (canMove) {
|
|
||||||
// The alloc was emitted in the init block already so just record
|
|
||||||
// that this value is not available in the init block.
|
|
||||||
alloc.getOperation()->moveBefore(&getInitBlock(function)->front());
|
|
||||||
markOperandInInitBlock(function, alloc.getResult());
|
|
||||||
} else {
|
|
||||||
// No init block exists in this case so just move it as before.
|
|
||||||
alloc.getOperation()->moveBefore(&parentBlock->front());
|
alloc.getOperation()->moveBefore(&parentBlock->front());
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (insertDealloc) {
|
if (insertDealloc) {
|
||||||
auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
|
auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
|
||||||
// Move dealloc to the end of the main block if such a block exists.
|
|
||||||
if (canMove) {
|
|
||||||
Block *mainBlock = getMainBlock(function);
|
|
||||||
dealloc.getOperation()->moveBefore(&mainBlock->back());
|
|
||||||
} else {
|
|
||||||
// If no main block exists, move to parent block.
|
|
||||||
dealloc.getOperation()->moveBefore(&parentBlock->back());
|
dealloc.getOperation()->moveBefore(&parentBlock->back());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return alloc;
|
return alloc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Insert an allocation and deallocation for the given MemRefType.
|
|
||||||
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
|
||||||
PatternRewriter &rewriter, bool insertDealloc, Operation *op,
|
|
||||||
ArrayRef<Value> operands, int64_t alignment) {
|
|
||||||
FuncOp function = getContainingFunction(op);
|
|
||||||
|
|
||||||
bool functionLevelAlloc = (op->getParentOp() == function);
|
|
||||||
if (!functionLevelAlloc) {
|
|
||||||
printf("This is not a function level alloc!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
return insertAllocAndDeallocWithFunction(type, loc, rewriter, insertDealloc,
|
|
||||||
function, functionLevelAlloc, operands, alignment);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine if current function returns the result value of the
|
// Determine if current function returns the result value of the
|
||||||
// current op being lowered. If it does then dealloc should not be
|
// current op being lowered. If it does then dealloc should not be
|
||||||
// inserted.
|
// inserted.
|
||||||
|
@ -645,10 +463,10 @@ int64_t ArrayAttrIntVal(ArrayAttr a, int i) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp) {
|
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp) {
|
||||||
FuncOp function = getContainingFunction(allocOp->getOperation());
|
auto parentBlock = allocOp->getOperation()->getBlock();
|
||||||
|
|
||||||
bool opIsUsedInGetRef = false;
|
bool opIsUsedInGetRef = false;
|
||||||
function.walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) {
|
parentBlock->walk([&opIsUsedInGetRef, allocOp](KrnlGetRefOp op) {
|
||||||
auto result = allocOp->getResult();
|
auto result = allocOp->getResult();
|
||||||
for (const auto &operand : op.getOperands())
|
for (const auto &operand : op.getOperands())
|
||||||
if (operand == result)
|
if (operand == result)
|
||||||
|
|
|
@ -19,9 +19,7 @@
|
||||||
#include "mlir/Pass/Pass.h"
|
#include "mlir/Pass/Pass.h"
|
||||||
#include "mlir/Transforms/DialectConversion.h"
|
#include "mlir/Transforms/DialectConversion.h"
|
||||||
#include "llvm/ADT/ArrayRef.h"
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
#include "llvm/ADT/DenseMap.h"
|
|
||||||
#include "llvm/ADT/Sequence.h"
|
#include "llvm/ADT/Sequence.h"
|
||||||
#include "llvm/ADT/SetVector.h"
|
|
||||||
|
|
||||||
#include "src/Dialect/Krnl/KrnlHelper.hpp"
|
#include "src/Dialect/Krnl/KrnlHelper.hpp"
|
||||||
#include "src/Dialect/Krnl/KrnlOps.hpp"
|
#include "src/Dialect/Krnl/KrnlOps.hpp"
|
||||||
|
@ -31,37 +29,6 @@
|
||||||
|
|
||||||
using namespace mlir;
|
using namespace mlir;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Insertion point for initialization instructions and the blocks used for
|
|
||||||
// inserting the initialization and main code. These blocks will disappear
|
|
||||||
// when the first canonicalization is performed because the init block
|
|
||||||
// unconditionally branches into the second block. These blocks exist only for
|
|
||||||
// the purpose of this optimization.
|
|
||||||
// The support happens on a per function basis.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
typedef struct ONNXOperandsInitState {
|
|
||||||
Block *initBlock;
|
|
||||||
Block *mainBlock;
|
|
||||||
BranchOp branchInit;
|
|
||||||
llvm::SetVector<Value> operandsInInitBlock;
|
|
||||||
} ONNXOperandsInitState;
|
|
||||||
|
|
||||||
typedef std::map<FuncOp, std::unique_ptr<ONNXOperandsInitState>>
|
|
||||||
FunctionToInitStates;
|
|
||||||
|
|
||||||
// This map is used by the FrontendToKrnlLoweringPass pass to keep track of the
|
|
||||||
// allocations emitted in the initialization block for each function of a given
|
|
||||||
// module. A translation unit can consist of several modules, each with several
|
|
||||||
// functions hence the structure shown below.
|
|
||||||
// This data structure enables the emission of dyanmic `alloc` instructions
|
|
||||||
// in the initialization block of a function if all the other operands the
|
|
||||||
// computation of its parameters depends on are also present in that function's
|
|
||||||
// initialization block.
|
|
||||||
// This data structure is live only during the execution of the frontend
|
|
||||||
// lowering to Krnl dialect pass (FrontendToKrnlLoweringPass).
|
|
||||||
extern std::map<ModuleOp, std::unique_ptr<FunctionToInitStates>> initMap;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Common functions used when lowering the ONNX frontend dialect to KRNL.
|
// Common functions used when lowering the ONNX frontend dialect to KRNL.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -77,14 +44,9 @@ MemRefType convertToMemRefType(Type type);
|
||||||
|
|
||||||
/// Insert an allocation and deallocation for the given MemRefType.
|
/// Insert an allocation and deallocation for the given MemRefType.
|
||||||
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
Value insertAllocAndDealloc(MemRefType type, Location loc,
|
||||||
PatternRewriter &rewriter, bool insertDealloc, Operation *op,
|
PatternRewriter &rewriter, bool insertDealloc,
|
||||||
ArrayRef<Value> operands = {}, int64_t alignment = -1);
|
ArrayRef<Value> operands = {}, int64_t alignment = -1);
|
||||||
|
|
||||||
Value insertAllocAndDeallocWithFunction(MemRefType type, Location loc,
|
|
||||||
PatternRewriter &rewriter, bool insertDealloc, FuncOp function,
|
|
||||||
bool functionLevelAlloc, ArrayRef<Value> operands = {},
|
|
||||||
int64_t alignment = -1);
|
|
||||||
|
|
||||||
// Determine if current function returns the result value of the
|
// Determine if current function returns the result value of the
|
||||||
// current op being lowered. If it does then dealloc should not be
|
// current op being lowered. If it does then dealloc should not be
|
||||||
// inserted.
|
// inserted.
|
||||||
|
@ -284,20 +246,3 @@ void populateLoweringONNXSplitOpPattern(
|
||||||
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp);
|
bool checkOpResultIsUsedByGetRef(AllocOp *allocOp);
|
||||||
|
|
||||||
int64_t getMemRefSizeInBytes(Value val);
|
int64_t getMemRefSizeInBytes(Value val);
|
||||||
|
|
||||||
FuncOp getContainingFunction(Operation *op);
|
|
||||||
|
|
||||||
void addInitBlock(PatternRewriter &rewriter, Location loc, FuncOp op);
|
|
||||||
|
|
||||||
bool containingFunctionHasInitBlock(Operation *op);
|
|
||||||
|
|
||||||
Block *getInitBlock(FuncOp function);
|
|
||||||
|
|
||||||
Block *getMainBlock(FuncOp function);
|
|
||||||
|
|
||||||
BranchOp getInitInsertionPoint(FuncOp function);
|
|
||||||
|
|
||||||
bool checkAllocMovable(
|
|
||||||
FuncOp function, bool functionLevelAlloc, ArrayRef<Value> operands);
|
|
||||||
|
|
||||||
void markOperandInInitBlock(FuncOp function, Value operand);
|
|
||||||
|
|
|
@ -161,14 +161,13 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
ConversionPatternRewriter &rewriter, Location loc, ONNXLSTMOp *op,
|
ConversionPatternRewriter &rewriter, Location loc, ONNXLSTMOp *op,
|
||||||
typename ONNXLSTMOp::Adaptor operandAdaptor) {
|
typename ONNXLSTMOp::Adaptor operandAdaptor) {
|
||||||
LstmState state;
|
LstmState state;
|
||||||
FuncOp function = cast<FuncOp>(op->getParentOp());
|
|
||||||
|
|
||||||
// Insert allocation and deallocation for the results of this operation.
|
// Insert allocation and deallocation for the results of this operation.
|
||||||
if (!isNoneType(op->Y())) {
|
if (!isNoneType(op->Y())) {
|
||||||
auto yMemRefType = convertToMemRefType(op->Y().getType());
|
auto yMemRefType = convertToMemRefType(op->Y().getType());
|
||||||
if (hasAllConstantDimensions(yMemRefType))
|
if (hasAllConstantDimensions(yMemRefType))
|
||||||
state.allH = insertAllocAndDeallocWithFunction(yMemRefType, loc, rewriter,
|
state.allH = insertAllocAndDealloc(yMemRefType, loc, rewriter,
|
||||||
checkInsertDealloc(op->getOperation(), 0), function, true);
|
checkInsertDealloc(op->getOperation(), 0));
|
||||||
else {
|
else {
|
||||||
llvm_unreachable("Unsupported dynamic dimensions.");
|
llvm_unreachable("Unsupported dynamic dimensions.");
|
||||||
}
|
}
|
||||||
|
@ -180,8 +179,8 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
if (!isNoneType(op->Y_h())) {
|
if (!isNoneType(op->Y_h())) {
|
||||||
auto yhMemRefType = convertToMemRefType(op->Y_h().getType());
|
auto yhMemRefType = convertToMemRefType(op->Y_h().getType());
|
||||||
if (hasAllConstantDimensions(yhMemRefType))
|
if (hasAllConstantDimensions(yhMemRefType))
|
||||||
state.ht = insertAllocAndDeallocWithFunction(yhMemRefType, loc, rewriter,
|
state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter,
|
||||||
checkInsertDealloc(op->getOperation(), 1), function, true);
|
checkInsertDealloc(op->getOperation(), 1));
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unsupported dynamic dimensions.");
|
llvm_unreachable("Unsupported dynamic dimensions.");
|
||||||
} else {
|
} else {
|
||||||
|
@ -189,16 +188,15 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
||||||
dimAt(operandAdaptor.R(), 2)},
|
dimAt(operandAdaptor.R(), 2)},
|
||||||
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
||||||
state.ht = insertAllocAndDeallocWithFunction(
|
state.ht = insertAllocAndDealloc(yhMemRefType, loc, rewriter, true);
|
||||||
yhMemRefType, loc, rewriter, true, function, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Y_c :: [num_directions, batch_size, hidden_size]
|
// Y_c :: [num_directions, batch_size, hidden_size]
|
||||||
if (!isNoneType(op->Y_c())) {
|
if (!isNoneType(op->Y_c())) {
|
||||||
auto ycMemRefType = convertToMemRefType(op->Y_c().getType());
|
auto ycMemRefType = convertToMemRefType(op->Y_c().getType());
|
||||||
if (hasAllConstantDimensions(ycMemRefType))
|
if (hasAllConstantDimensions(ycMemRefType))
|
||||||
state.ct = insertAllocAndDeallocWithFunction(ycMemRefType, loc, rewriter,
|
state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter,
|
||||||
checkInsertDealloc(op->getOperation(), 2), function, true);
|
checkInsertDealloc(op->getOperation(), 2));
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unsupported dynamic dimensions.");
|
llvm_unreachable("Unsupported dynamic dimensions.");
|
||||||
} else {
|
} else {
|
||||||
|
@ -206,8 +204,7 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
||||||
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
{dimAt(operandAdaptor.W(), 0), dimAt(operandAdaptor.X(), 1),
|
||||||
dimAt(operandAdaptor.R(), 2)},
|
dimAt(operandAdaptor.R(), 2)},
|
||||||
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
operandAdaptor.X().getType().cast<ShapedType>().getElementType());
|
||||||
state.ct = insertAllocAndDeallocWithFunction(
|
state.ct = insertAllocAndDealloc(ycMemRefType, loc, rewriter, true);
|
||||||
ycMemRefType, loc, rewriter, true, function, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize ht and ct.
|
// Initialize ht and ct.
|
||||||
|
|
|
@ -20,7 +20,6 @@ struct ONNXConcatOpLowering : public ConversionPattern {
|
||||||
ConversionPatternRewriter &rewriter) const final {
|
ConversionPatternRewriter &rewriter) const final {
|
||||||
// Gather info.
|
// Gather info.
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
ONNXConcatOp concatOp = llvm::dyn_cast<ONNXConcatOp>(op);
|
ONNXConcatOp concatOp = llvm::dyn_cast<ONNXConcatOp>(op);
|
||||||
|
@ -34,11 +33,10 @@ struct ONNXConcatOpLowering : public ConversionPattern {
|
||||||
assert((axis >= 0 && axis < rank) && "Concat axis out of bounds");
|
assert((axis >= 0 && axis < rank) && "Concat axis out of bounds");
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, op, {resultOperand});
|
memRefType, loc, rewriter, insertDealloc, {resultOperand});
|
||||||
|
|
||||||
// Creates loops, one for each input.
|
// Creates loops, one for each input.
|
||||||
int writeOffset = 0;
|
int writeOffset = 0;
|
||||||
|
|
|
@ -18,7 +18,6 @@ struct ONNXIdentityOpLowering : public ConversionPattern {
|
||||||
|
|
||||||
LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
|
LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
|
||||||
ConversionPatternRewriter &rewriter) const final {
|
ConversionPatternRewriter &rewriter) const final {
|
||||||
auto loc = op->getLoc();
|
|
||||||
ONNXIdentityOpAdaptor operandAdaptor(operands);
|
ONNXIdentityOpAdaptor operandAdaptor(operands);
|
||||||
rewriter.replaceOp(op, operandAdaptor.input());
|
rewriter.replaceOp(op, operandAdaptor.input());
|
||||||
return success();
|
return success();
|
||||||
|
|
|
@ -40,13 +40,11 @@ struct ONNXPadOpLowering : public ConversionPattern {
|
||||||
return emitError(loc, "Pad: unknown pads");
|
return emitError(loc, "Pad: unknown pads");
|
||||||
|
|
||||||
auto memRefType = convertToMemRefType(tensorType);
|
auto memRefType = convertToMemRefType(tensorType);
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
return emitError(loc, "unexpected output has non-Constant shape");
|
return emitError(loc, "unexpected output has non-Constant shape");
|
||||||
|
|
||||||
|
|
|
@ -32,13 +32,11 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern {
|
||||||
|
|
||||||
// Insert an allocation and deallocation for the result of this operation.
|
// Insert an allocation and deallocation for the result of this operation.
|
||||||
auto memRefType = convertToMemRefType(tensorType);
|
auto memRefType = convertToMemRefType(tensorType);
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
return emitError(loc, "unexpected output has non-Constant shape");
|
return emitError(loc, "unexpected output has non-Constant shape");
|
||||||
|
|
||||||
|
|
|
@ -46,8 +46,7 @@ struct ONNXReshapeOpLowering : public ConversionPattern {
|
||||||
|
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType)) {
|
if (hasAllConstantDimensions(memRefType)) {
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
} else {
|
} else {
|
||||||
// If a dimension is zero, the actual dimension value is taken from the
|
// If a dimension is zero, the actual dimension value is taken from the
|
||||||
// input tensor.
|
// input tensor.
|
||||||
|
|
|
@ -40,8 +40,7 @@ struct ONNXSplitOpLowering : public ConversionPattern {
|
||||||
auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType());
|
auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType());
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else {
|
else {
|
||||||
SmallVector<Value, 4> allocOperands;
|
SmallVector<Value, 4> allocOperands;
|
||||||
auto shape = memRefType.getShape();
|
auto shape = memRefType.getShape();
|
||||||
|
|
|
@ -39,8 +39,7 @@ struct ONNXSqueezeOpLowering : public ConversionPattern {
|
||||||
Value alloc, tensorSize;
|
Value alloc, tensorSize;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
if (hasAllConstantDimensions(memRefType)) {
|
if (hasAllConstantDimensions(memRefType)) {
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
auto tensorSizeInBytes = elementSizeInBytes;
|
auto tensorSizeInBytes = elementSizeInBytes;
|
||||||
for (int i = 0; i < memRefShape.size(); ++i) {
|
for (int i = 0; i < memRefShape.size(); ++i) {
|
||||||
tensorSizeInBytes *= memRefShape[i];
|
tensorSizeInBytes *= memRefShape[i];
|
||||||
|
|
|
@ -22,17 +22,15 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
|
||||||
auto loc = op->getLoc();
|
auto loc = op->getLoc();
|
||||||
// Insert an allocation and deallocation for the result of this operation.
|
// Insert an allocation and deallocation for the result of this operation.
|
||||||
auto memRefType = convertToMemRefType(*op->result_type_begin());
|
auto memRefType = convertToMemRefType(*op->result_type_begin());
|
||||||
|
|
||||||
Value alloc;
|
Value alloc;
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
Value data = operandAdaptor.data();
|
Value data = operandAdaptor.data();
|
||||||
|
|
||||||
if (hasAllConstantDimensions(memRefType))
|
if (hasAllConstantDimensions(memRefType))
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
else
|
else
|
||||||
alloc = insertAllocAndDealloc(
|
alloc = insertAllocAndDealloc(
|
||||||
memRefType, loc, rewriter, insertDealloc, op, {data});
|
memRefType, loc, rewriter, insertDealloc, {data});
|
||||||
|
|
||||||
// Number of loops
|
// Number of loops
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
|
|
|
@ -44,8 +44,7 @@ struct ONNXUnsqueezeOpLowering : public ConversionPattern {
|
||||||
bool insertDealloc = checkInsertDealloc(op);
|
bool insertDealloc = checkInsertDealloc(op);
|
||||||
auto memRefShape = memRefType.getShape();
|
auto memRefShape = memRefType.getShape();
|
||||||
if (hasAllConstantDimensions(memRefType)) {
|
if (hasAllConstantDimensions(memRefType)) {
|
||||||
alloc =
|
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
|
||||||
insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc, op);
|
|
||||||
for (int i = 0; i < memRefShape.size(); ++i) {
|
for (int i = 0; i < memRefShape.size(); ++i) {
|
||||||
Value dimVal = emitConstantOp(
|
Value dimVal = emitConstantOp(
|
||||||
rewriter, loc, rewriter.getIntegerType(64), memRefShape[i]);
|
rewriter, loc, rewriter.getIntegerType(64), memRefShape[i]);
|
||||||
|
|
|
@ -378,7 +378,6 @@ void addONNXToMLIRPasses(mlir::PassManager &pm) {
|
||||||
|
|
||||||
void addONNXToKrnlPasses(mlir::PassManager &pm) {
|
void addONNXToKrnlPasses(mlir::PassManager &pm) {
|
||||||
pm.addPass(mlir::createLowerToKrnlPass());
|
pm.addPass(mlir::createLowerToKrnlPass());
|
||||||
pm.addPass(mlir::createCanonicalizerPass());
|
|
||||||
pm.addPass(mlir::createPackKrnlGlobalConstantsPass());
|
pm.addPass(mlir::createPackKrnlGlobalConstantsPass());
|
||||||
// An additional pass of canonicalization is helpful because lowering
|
// An additional pass of canonicalization is helpful because lowering
|
||||||
// from ONNX dialect to Standard dialect exposes additional canonicalization
|
// from ONNX dialect to Standard dialect exposes additional canonicalization
|
||||||
|
|
|
@ -87,6 +87,8 @@ public:
|
||||||
|
|
||||||
// Get a KrnlGetRefOp which does not use the current alloc.
|
// Get a KrnlGetRefOp which does not use the current alloc.
|
||||||
if (KrnlGetRefOp unbundledGetRef = getUnbundledGetRef(&allocOp)) {
|
if (KrnlGetRefOp unbundledGetRef = getUnbundledGetRef(&allocOp)) {
|
||||||
|
unbundledGetRef.dump();
|
||||||
|
|
||||||
// Current memory pool size is the offset for the newly bundled
|
// Current memory pool size is the offset for the newly bundled
|
||||||
// internal MemRef. Emit the offset as a constant.
|
// internal MemRef. Emit the offset as a constant.
|
||||||
auto offset = rewriter.create<ConstantOp>(
|
auto offset = rewriter.create<ConstantOp>(
|
||||||
|
|
|
@ -24,10 +24,10 @@ using namespace mlir;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
bool checkOpResultIsReturned(AllocOp *allocOp) {
|
bool checkOpResultIsReturned(AllocOp *allocOp) {
|
||||||
FuncOp function = getContainingFunction(allocOp->getOperation());
|
auto parentBlock = allocOp->getOperation()->getBlock();
|
||||||
|
|
||||||
bool opIsReturned = false;
|
bool opIsReturned = false;
|
||||||
function.walk([&opIsReturned, allocOp](ReturnOp op) {
|
parentBlock->walk([&opIsReturned, allocOp](ReturnOp op) {
|
||||||
auto result = allocOp->getResult();
|
auto result = allocOp->getResult();
|
||||||
for (const auto &operand : op.getOperands())
|
for (const auto &operand : op.getOperands())
|
||||||
if (operand == result)
|
if (operand == result)
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> {
|
func @test_constant(%arg0 : tensor<1xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32>
|
%0 = "onnx.Constant"() {value = dense<[[0.0, 0.0], [1.0, 1.1], [2.0, 2.1]]> : tensor<3x2xf32>} : () -> tensor<*xf32>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --lower-krnl --lower-all-llvm %s | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
func @test_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
func @test_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
||||||
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --lower-krnl --lower-all-llvm %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi64>) -> tensor<*xf32> {
|
func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi64>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.Reshape"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<4xi64>) -> tensor<*xf32>
|
%0 = "onnx.Reshape"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<4xi64>) -> tensor<*xf32>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool --bundle-memory-pools --canonicalize %s | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool --bundle-memory-pools --canonicalize %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>) -> tensor<10x20xf32> {
|
func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>) -> tensor<10x20xf32> {
|
||||||
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
%0 = "onnx.Add"(%arg0, %arg0) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<10x10xf32>
|
||||||
|
@ -10,8 +10,8 @@ func @test_bundle_memory_pool(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf32>
|
||||||
return %5 : tensor<10x20xf32>
|
return %5 : tensor<10x20xf32>
|
||||||
|
|
||||||
// CHECK-LABEL: test_bundle_memory_pool
|
// CHECK-LABEL: test_bundle_memory_pool
|
||||||
// CHECK: [[CONST00:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
||||||
|
// CHECK: [[CONST00:%.+]] = constant 0.000000e+00 : f32
|
||||||
// CHECK: [[CONST400:%.+]] = constant 400 : i64
|
// CHECK: [[CONST400:%.+]] = constant 400 : i64
|
||||||
// CHECK: [[CONST1200:%.+]] = constant 1200 : i64
|
// CHECK: [[CONST1200:%.+]] = constant 1200 : i64
|
||||||
// CHECK: [[CONST2000:%.+]] = constant 2000 : i64
|
// CHECK: [[CONST2000:%.+]] = constant 2000 : i64
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --canonicalize --enable-memory-pool %s | FileCheck %s
|
// RUN: onnx-mlir-opt --shape-inference --lower-frontend --enable-memory-pool %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
/// One intermediate value to allocate in the memory pool.
|
/// One intermediate value to allocate in the memory pool.
|
||||||
func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
||||||
|
@ -13,10 +13,10 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
||||||
// CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
// CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32>
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg1), symbol(%arg2)] : memref<10x10xf32>
|
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
|
||||||
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||||
// CHECK: affine.store [[ADDF1]], [[GETREF]][symbol(%arg1), symbol(%arg2)] : memref<10x10xf32>
|
// CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: dealloc [[MEMPOOL]] : memref<400xi8>
|
// CHECK: dealloc [[MEMPOOL]] : memref<400xi8>
|
||||||
|
@ -31,8 +31,8 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
|
||||||
return %2 : tensor<10x20xf32>
|
return %2 : tensor<10x20xf32>
|
||||||
|
|
||||||
// CHECK-LABEL: test_enable_memory_pool_2
|
// CHECK-LABEL: test_enable_memory_pool_2
|
||||||
// CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
// CHECK: [[CONST0:%.+]] = constant 0 : i64
|
||||||
|
// CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x20xf32>
|
// CHECK: [[RES:%.+]] = alloc() : memref<10x20xf32>
|
||||||
// CHECK: [[MEMPOOL0:%.+]] = alloc() : memref<800xi8>
|
// CHECK: [[MEMPOOL0:%.+]] = alloc() : memref<800xi8>
|
||||||
// CHECK: [[GETREF0:%.+]] = "krnl.getref"([[MEMPOOL0]], [[CONST0]]) : (memref<800xi8>, i64) -> memref<10x20xf32>
|
// CHECK: [[GETREF0:%.+]] = "krnl.getref"([[MEMPOOL0]], [[CONST0]]) : (memref<800xi8>, i64) -> memref<10x20xf32>
|
||||||
|
@ -40,24 +40,24 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
|
||||||
// CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
// CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32>
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[symbol(%arg2), symbol(%arg3)] : memref<10x10xf32>
|
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||||
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||||
// CHECK: affine.store [[ADDF1]], [[GETREF1]][symbol(%arg2), symbol(%arg3)] : memref<10x10xf32>
|
// CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][symbol(%arg2), symbol(%arg4)] : memref<10x10xf32>
|
// CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32>
|
||||||
// CHECK: [[LOAD4:%.+]] = affine.load %arg1[symbol(%arg4), symbol(%arg3)] : memref<10x20xf32>
|
// CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32>
|
||||||
// CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
// CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
||||||
// CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32
|
// CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32
|
||||||
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
|
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
|
||||||
// CHECK: affine.store [[ADDF2]], [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
// CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
||||||
// CHECK: krnl.define_loops
|
// CHECK: krnl.define_loops
|
||||||
// CHECK: krnl.iterate
|
// CHECK: krnl.iterate
|
||||||
// CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
// CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
||||||
// CHECK: [[LOAD7:%.+]] = affine.load %arg1[symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
// CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32>
|
||||||
// CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32
|
// CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32
|
||||||
// CHECK: affine.store [[ADDF3]], [[RES]][symbol(%arg2), symbol(%arg3)] : memref<10x20xf32>
|
// CHECK: affine.store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32>
|
||||||
// CHECK: dealloc [[MEMPOOL1]] : memref<400xi8>
|
// CHECK: dealloc [[MEMPOOL1]] : memref<400xi8>
|
||||||
// CHECK: dealloc [[MEMPOOL0]] : memref<800xi8>
|
// CHECK: dealloc [[MEMPOOL0]] : memref<800xi8>
|
||||||
// CHECK: return [[RES]] : memref<10x20xf32>
|
// CHECK: return [[RES]] : memref<10x20xf32>
|
||||||
|
|
|
@ -695,6 +695,100 @@ func @test_add_with_broadcasting(%arg0 : tensor<?xf32>, %arg1 : tensor<?x10xf32>
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
|
func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reducemax
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reducemin
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reduceprod
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
||||||
|
%0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: test_reducesum
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
||||||
|
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
||||||
|
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
||||||
|
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32
|
||||||
|
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES]] : memref<3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> {
|
func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> {
|
||||||
%0 = "onnx.Softmax"(%arg0) {axis=1:i64} : (tensor<10x10xf32>) -> tensor<*xf32>
|
%0 = "onnx.Softmax"(%arg0) {axis=1:i64} : (tensor<10x10xf32>) -> tensor<*xf32>
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
@ -1013,10 +1107,10 @@ func @test_matmul5(%arg0 : tensor<5xf32>, %arg1 : tensor<?x5x10xf32>) -> tensor<
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_matmul5
|
// CHECK-LABEL: test_matmul5
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref<?x5x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref<?x5x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -1045,10 +1139,10 @@ func @test_matmul6(%arg0 : tensor<?x10x5xf32>, %arg1 : tensor<5xf32>) -> tensor<
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_matmul6
|
// CHECK-LABEL: test_matmul6
|
||||||
|
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10x5xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10x5xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -1421,3 +1515,506 @@ func @test_concat_1(%arg0 : tensor<5x5x1x32xf32>, %arg1 : tensor<5x5x3x32xf32>,
|
||||||
|
|
||||||
// CHECK: return [[RES]] : memref<5x5x9x32xf32>
|
// CHECK: return [[RES]] : memref<5x5x9x32xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)>
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)>
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<() -> (0)>
|
||||||
|
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_pool_general_computation
|
||||||
|
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
|
||||||
|
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
||||||
|
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
|
||||||
|
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
||||||
|
// CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-DAG: #[[AFFINE_MAP:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)>
|
||||||
|
// CHECK-LABEL: test_pool_unknown_dimensions
|
||||||
|
// CHECK: [[C0:%.+]] = constant 2 : index
|
||||||
|
// CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32>
|
||||||
|
// CHECK: [[KERNEL:%.+]] = constant 2 : index
|
||||||
|
// CHECK: [[PAD:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[STRIDE:%.+]] = constant 1 : index
|
||||||
|
// CHECK: [[DILATION:%.+]] = constant 1 : index
|
||||||
|
// CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}}
|
||||||
|
// CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_averagepool_identity_value
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_maxpool_identity_value
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
||||||
|
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_averagepool_pooling_operation
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
|
||||||
|
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
||||||
|
|
||||||
|
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
||||||
|
|
||||||
|
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
||||||
|
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
||||||
|
// CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32
|
||||||
|
// CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_maxpool_pooling_operation
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
||||||
|
|
||||||
|
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
||||||
|
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
||||||
|
|
||||||
|
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
||||||
|
|
||||||
|
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
||||||
|
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
||||||
|
// CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
||||||
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
||||||
|
// CHECK: }
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
||||||
|
%cst = constant unit
|
||||||
|
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
||||||
|
return %Y_h : tensor<*xf32>
|
||||||
|
|
||||||
|
// CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)>
|
||||||
|
// CHECK-LABEL: @test_lstm_general_computation
|
||||||
|
|
||||||
|
// CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
||||||
|
// CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
||||||
|
// CHECK: {{.*}} = constant unit
|
||||||
|
|
||||||
|
// CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) {
|
||||||
|
// CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
// CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: {{.*}} = constant 0 : index
|
||||||
|
// CHECK: {{.*}} = constant 3 : index
|
||||||
|
// CHECK: {{.*}} = constant 0 : index
|
||||||
|
// CHECK: {{.*}} = constant 1 : index
|
||||||
|
// CHECK: {{.*}} = constant 2 : index
|
||||||
|
// CHECK: {{.*}} = constant 3 : index
|
||||||
|
// CHECK: {{.*}} = constant 4 : index
|
||||||
|
// CHECK: {{.*}} = constant 5 : index
|
||||||
|
// CHECK: {{.*}} = constant 6 : index
|
||||||
|
// CHECK: {{.*}} = constant 7 : index
|
||||||
|
// CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
|
// CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) {
|
||||||
|
// CHECK: [[hCt:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[Ot:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[ct:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[Ft:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[It:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
// CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
|
||||||
|
// CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) {
|
||||||
|
// CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3]
|
||||||
|
// CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3]
|
||||||
|
// CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3]
|
||||||
|
// CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3]
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
||||||
|
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32
|
||||||
|
// CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: }
|
||||||
|
|
||||||
|
// CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[It]][] : memref<f32>
|
||||||
|
// CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ft]][] : memref<f32>
|
||||||
|
// CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[TANH_CELL:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[ct]][] : memref<f32>
|
||||||
|
// CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32
|
||||||
|
// CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32
|
||||||
|
// CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32
|
||||||
|
// CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
|
||||||
|
// CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
||||||
|
// CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32
|
||||||
|
|
||||||
|
// CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[Ot]][] : memref<f32>
|
||||||
|
// CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref<f32>
|
||||||
|
// CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref<f32>
|
||||||
|
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = exp {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
||||||
|
// CHECK: affine.store {{.*}}, [[hCt]][] : memref<f32>
|
||||||
|
// CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref<f32>
|
||||||
|
|
||||||
|
// CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32
|
||||||
|
// CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
||||||
|
|
||||||
|
// CHECK: dealloc [[XtWi_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[XtWo_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[XtWf_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[XtWc_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Ri_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Ro_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Rf_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ht1Rc_GEMM]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[It]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ft]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[ct]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[Ot]] : memref<f32>
|
||||||
|
// CHECK: dealloc [[hCt]] : memref<f32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32>
|
||||||
|
// CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
||||||
|
%cst = constant unit
|
||||||
|
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
||||||
|
return %Y_h : tensor<*xf32>
|
||||||
|
|
||||||
|
// CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
||||||
|
// CHECK-LABEL: @test_lstm_reverse_mode
|
||||||
|
|
||||||
|
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
||||||
|
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
||||||
|
%cst = constant unit
|
||||||
|
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
||||||
|
return %Y_h : tensor<*xf32>
|
||||||
|
|
||||||
|
// CHECK: [[REVERSE_IV_MAP:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
||||||
|
// CHECK-LABEL: @test_lstm_bidirectional_mode
|
||||||
|
|
||||||
|
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
||||||
|
|
||||||
|
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
||||||
|
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
||||||
|
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
||||||
|
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
||||||
|
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>)
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_squeeze
|
||||||
|
// CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32>
|
||||||
|
// CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64
|
||||||
|
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> ()
|
||||||
|
// CHECK: return [[RES]] : memref<16x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_squeeze_unknown_dimensions(%arg0 : tensor<?x1x32x?x64xf32>) -> tensor<*xf32> {
|
||||||
|
%0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor<?x1x32x?x64xf32>) -> (tensor<*xf32>)
|
||||||
|
"std.return"(%0) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK-LABEL: @test_squeeze_unknown_dimensions
|
||||||
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x1x32x?x64xf32>
|
||||||
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x32x64xf32>
|
||||||
|
// CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64
|
||||||
|
// CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64
|
||||||
|
// CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64
|
||||||
|
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref<?x32x64xf32>, memref<?x1x32x?x64xf32>, i64) -> ()
|
||||||
|
// CHECK: return [[RES]] : memref<?x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
||||||
|
%0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
||||||
|
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 8)>
|
||||||
|
// CHECK-LABEL: @test_split_equal
|
||||||
|
|
||||||
|
// CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32>
|
||||||
|
// CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32>
|
||||||
|
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1)
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
||||||
|
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
||||||
|
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
||||||
|
// CHECK-LABEL: @test_split_variable
|
||||||
|
|
||||||
|
// CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32>
|
||||||
|
// CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32>
|
||||||
|
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2)
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
func @test_split_unknown_dimension(%arg0 : tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
||||||
|
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
||||||
|
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
||||||
|
|
||||||
|
// CHECK: [[INDEX_MAP:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
||||||
|
// CHECK-LABEL: @test_split_unknown_dimension
|
||||||
|
|
||||||
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x?x64xf32>
|
||||||
|
// CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref<?x2x64xf32>
|
||||||
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref<?x?x64xf32>
|
||||||
|
// CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref<?x30x64xf32>
|
||||||
|
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref<?x2x64xf32>
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<?x?x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<?x2x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
||||||
|
// CHECK: [[C0_3:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref<?x30x64xf32>
|
||||||
|
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
||||||
|
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2)
|
||||||
|
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<?x?x64xf32>
|
||||||
|
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<?x30x64xf32>
|
||||||
|
// CHECK: }
|
||||||
|
// CHECK: return [[RES_0]], [[RES_1]] : memref<?x2x64xf32>, memref<?x30x64xf32>
|
||||||
|
}
|
||||||
|
|
|
@ -1,263 +0,0 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
|
||||||
|
|
||||||
func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
|
||||||
%cst = constant unit
|
|
||||||
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
|
||||||
return %Y_h : tensor<*xf32>
|
|
||||||
|
|
||||||
// CHECK-DAG: [[ACCESS_BY_OFFSET_MAP:#.+]] = affine_map<(d0)[s0, s1] -> (d0 + s0 * s1)>
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_lstm_general_computation
|
|
||||||
|
|
||||||
// CHECK: [[CELL_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
|
||||||
// CHECK: [[HIDDEN_STATE:%.+]] = alloc() : memref<1x3x3xf32>
|
|
||||||
// CHECK: {{.*}} = constant unit
|
|
||||||
|
|
||||||
// CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) {
|
|
||||||
// CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
// CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: {{.*}} = constant 0 : index
|
|
||||||
// CHECK: {{.*}} = constant 3 : index
|
|
||||||
// CHECK: {{.*}} = constant 0 : index
|
|
||||||
// CHECK: {{.*}} = constant 1 : index
|
|
||||||
// CHECK: {{.*}} = constant 2 : index
|
|
||||||
// CHECK: {{.*}} = constant 3 : index
|
|
||||||
// CHECK: {{.*}} = constant 4 : index
|
|
||||||
// CHECK: {{.*}} = constant 5 : index
|
|
||||||
// CHECK: {{.*}} = constant 6 : index
|
|
||||||
// CHECK: {{.*}} = constant 7 : index
|
|
||||||
// CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) {
|
|
||||||
// CHECK: [[hCt:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[Ot:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[ct:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[Ft:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[It:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
// CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
|
|
||||||
// CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) {
|
|
||||||
// CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3]
|
|
||||||
// CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3]
|
|
||||||
// CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3]
|
|
||||||
// CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3]
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32>
|
|
||||||
// CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32
|
|
||||||
// CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[It]][] : memref<f32>
|
|
||||||
// CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ft]][] : memref<f32>
|
|
||||||
// CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[TANH_CELL:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[ct]][] : memref<f32>
|
|
||||||
// CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32
|
|
||||||
// CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32
|
|
||||||
// CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32
|
|
||||||
// CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
|
|
||||||
// CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref<f32>
|
|
||||||
// CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32
|
|
||||||
|
|
||||||
// CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[Ot]][] : memref<f32>
|
|
||||||
// CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref<f32>
|
|
||||||
// CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref<f32>
|
|
||||||
// CHECK: {{.*}} = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = exp {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
|
|
||||||
// CHECK: affine.store {{.*}}, [[hCt]][] : memref<f32>
|
|
||||||
// CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref<f32>
|
|
||||||
|
|
||||||
// CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32
|
|
||||||
// CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32>
|
|
||||||
|
|
||||||
// CHECK: dealloc [[XtWi_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[XtWo_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[XtWf_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[XtWc_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Ri_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Ro_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Rf_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ht1Rc_GEMM]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[It]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ft]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[ct]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[Ot]] : memref<f32>
|
|
||||||
// CHECK: dealloc [[hCt]] : memref<f32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: dealloc [[CELL_STATE]] : memref<1x3x3xf32>
|
|
||||||
// CHECK: return [[HIDDEN_STATE]] : memref<1x3x3xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
|
||||||
%cst = constant unit
|
|
||||||
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "reverse"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
|
||||||
return %Y_h : tensor<*xf32>
|
|
||||||
|
|
||||||
// CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_lstm_reverse_mode
|
|
||||||
|
|
||||||
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
|
||||||
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32>, %arg2: tensor<1x12x3xf32>) -> tensor<*xf32> {
|
|
||||||
%cst = constant unit
|
|
||||||
%Y, %Y_h, %Y_c = "onnx.LSTM"(%arg0, %arg1, %arg2, %cst, %cst, %cst, %cst, %cst) {hidden_size = 3 : i64, direction = "bidirectional"} : (tensor<4x3x2xf32>, tensor<1x12x2xf32>, tensor<1x12x3xf32>, none, none, none, none, none) -> (none, tensor<*xf32>, none)
|
|
||||||
return %Y_h : tensor<*xf32>
|
|
||||||
|
|
||||||
// CHECK-DAG: [[REVERSE_IV_MAP1:#.+]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_lstm_bidirectional_mode
|
|
||||||
|
|
||||||
// CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1
|
|
||||||
// CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) {
|
|
||||||
// CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index
|
|
||||||
// CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP1]](%arg3)[%[[SEQUENCE_LEN]]{{]}}
|
|
||||||
// CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32>
|
|
||||||
}
|
|
|
@ -1,121 +0,0 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s | FileCheck %s
|
|
||||||
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> ((s2 ceildiv s4) * s4 - s2, d0 * s3 - s2)>
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0, d0 * s3 + (s1 - 1) * s4 - s2 + 1)>
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<() -> (0)>
|
|
||||||
// CHECK-DAG: #{{.*}} = affine_map<(d0)[s0, s1, s2, s3, s4] -> (s0 - ((s2 ceildiv s4) * s4 - s2), -(d0 * s3 - s2) + s0, d0 * s3 + (s1 - 1) * s4 - s2 - ((s2 ceildiv s4) * s4 - s2) + 1, d0 * s3 + (s1 - 1) * s4 - s2 - (d0 * s3 - s2) + 1)>
|
|
||||||
|
|
||||||
// CHECK-DAG: #[[AFFINE_MAP1:.+]] = affine_map<(d0)[s0, s1, s2, s3] -> ((d0 + s1 - (s0 - 1) * s3 - 1) floordiv s2 + 1)>
|
|
||||||
|
|
||||||
func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_pool_general_computation
|
|
||||||
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
|
|
||||||
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
|
||||||
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
|
||||||
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
|
|
||||||
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
|
||||||
// CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
|
||||||
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
}
|
|
||||||
|
|
||||||
func @test_pool_unknown_dimensions(%arg0 : tensor<1x3x?x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x?x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_pool_unknown_dimensions
|
|
||||||
// CHECK: [[C0:%.+]] = constant 2 : index
|
|
||||||
// CHECK: [[DIM:%.+]] = dim %arg0, [[C0]] : memref<1x3x?x32xf32>
|
|
||||||
// CHECK: [[KERNEL:%.+]] = constant 2 : index
|
|
||||||
// CHECK: [[PAD:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[STRIDE:%.+]] = constant 1 : index
|
|
||||||
// CHECK: [[DILATION:%.+]] = constant 1 : index
|
|
||||||
// CHECK: [[AFFINE_APPLY:%.+]] = affine.apply #[[AFFINE_MAP1]]([[DIM]]){{.*}}[[KERNEL]], [[PAD]], [[STRIDE]], [[DILATION]]{{.*}}
|
|
||||||
// CHECK: [[RES:%.+]] = alloc([[AFFINE_APPLY]]) : memref<1x3x?x31xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_averagepool_identity_value
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_maxpool_identity_value
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.AveragePool"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_averagepool_pooling_operation
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
|
|
||||||
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
|
||||||
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
|
||||||
|
|
||||||
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
|
||||||
|
|
||||||
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
|
||||||
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
|
||||||
// CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32
|
|
||||||
// CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.MaxPoolSingleOut"(%arg0) {auto_pad = "NOTSET", kernel_shape = [2, 2]} : (tensor<1x3x32x32xf32>) -> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_maxpool_pooling_operation
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32>
|
|
||||||
|
|
||||||
// CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4
|
|
||||||
// CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) {
|
|
||||||
|
|
||||||
// CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) {
|
|
||||||
|
|
||||||
// CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32>
|
|
||||||
// CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
|
||||||
// CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32
|
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
|
|
||||||
// CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32>
|
|
||||||
// CHECK: }
|
|
||||||
}
|
|
|
@ -1,93 +0,0 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
|
||||||
|
|
||||||
func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceMax"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reducemax
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceMin"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reducemin
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceProd"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reduceprod
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> {
|
|
||||||
%0 ="onnx.ReduceSum"(%arg0) {axes=[1], keepdims = 0 : i64} : (tensor<3x2x2xf32>)-> tensor<*xf32>
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: test_reducesum
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32>
|
|
||||||
// CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) {
|
|
||||||
// CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32
|
|
||||||
// CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32>
|
|
||||||
|
|
||||||
// CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) {
|
|
||||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32>
|
|
||||||
// CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32
|
|
||||||
// CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES]] : memref<3x2xf32>
|
|
||||||
}
|
|
|
@ -1,85 +0,0 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
|
||||||
|
|
||||||
func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
|
||||||
%0, %1 = "onnx.Split"(%arg0) { axis = 0} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
|
||||||
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK: [[INDEX_MAP1:#.+]] = affine_map<(d0) -> (d0 + 8)>
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_split_equal
|
|
||||||
|
|
||||||
// CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32>
|
|
||||||
// CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32>
|
|
||||||
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP1]](%arg1)
|
|
||||||
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
|
||||||
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
|
||||||
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK: [[INDEX_MAP2:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_split_variable
|
|
||||||
|
|
||||||
// CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32>
|
|
||||||
// CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32>
|
|
||||||
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP2]](%arg2)
|
|
||||||
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_split_unknown_dimension(%arg0 : tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>) {
|
|
||||||
%0, %1 = "onnx.Split"(%arg0) { axis = 1, split = [2, 30]} : (tensor<?x?x64xf32>) -> (tensor<*xf32>, tensor<*xf32>)
|
|
||||||
"std.return"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK: [[INDEX_MAP3:#.+]] = affine_map<(d0) -> (d0 + 2)>
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_split_unknown_dimension
|
|
||||||
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x?x64xf32>
|
|
||||||
// CHECK: [[RES_0:%.+]] = alloc([[DIM_0]]) : memref<?x2x64xf32>
|
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref<?x?x64xf32>
|
|
||||||
// CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref<?x30x64xf32>
|
|
||||||
// CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref<?x2x64xf32>
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<?x?x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<?x2x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3
|
|
||||||
// CHECK: [[C0_3:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref<?x30x64xf32>
|
|
||||||
// CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) {
|
|
||||||
// CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP3]](%arg2)
|
|
||||||
// CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<?x?x64xf32>
|
|
||||||
// CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<?x30x64xf32>
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: return [[RES_0]], [[RES_1]] : memref<?x2x64xf32>, memref<?x30x64xf32>
|
|
||||||
}
|
|
|
@ -1,29 +0,0 @@
|
||||||
// RUN: onnx-mlir-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
|
|
||||||
|
|
||||||
func @test_squeeze(%arg0 : tensor<16x1x32x1x64xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.Squeeze"(%arg0) { axes = [1, -2]} : (tensor<16x1x32x1x64xf32>) -> (tensor<*xf32>)
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_squeeze
|
|
||||||
// CHECK: [[RES:%.+]] = alloc() : memref<16x32x64xf32>
|
|
||||||
// CHECK: [[TENSOR_SIZE:%.+]] = constant 131072 : i64
|
|
||||||
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE]]) : (memref<16x32x64xf32>, memref<16x1x32x1x64xf32>, i64) -> ()
|
|
||||||
// CHECK: return [[RES]] : memref<16x32x64xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
func @test_squeeze_unknown_dimensions(%arg0 : tensor<?x1x32x?x64xf32>) -> tensor<*xf32> {
|
|
||||||
%0 = "onnx.Squeeze"(%arg0) { axes = [1,-2]} : (tensor<?x1x32x?x64xf32>) -> (tensor<*xf32>)
|
|
||||||
"std.return"(%0) : (tensor<*xf32>) -> ()
|
|
||||||
|
|
||||||
// CHECK-LABEL: @test_squeeze_unknown_dimensions
|
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x1x32x?x64xf32>
|
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x32x64xf32>
|
|
||||||
// CHECK: [[TENSOR_SIZE_0:%.+]] = constant 8192 : i64
|
|
||||||
// CHECK: [[DIM_0_i64:%.+]] = index_cast [[DIM_0]] : index to i64
|
|
||||||
// CHECK: [[TENSOR_SIZE_1:%.+]] = muli [[TENSOR_SIZE_0]], [[DIM_0_i64]] : i64
|
|
||||||
// CHECK: "krnl.memcpy"([[RES]], %arg0, [[TENSOR_SIZE_1]]) : (memref<?x32x64xf32>, memref<?x1x32x?x64xf32>, i64) -> ()
|
|
||||||
// CHECK: return [[RES]] : memref<?x32x64xf32>
|
|
||||||
}
|
|
|
@ -239,15 +239,10 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_exp_exp
|
// CHECK-LABEL: test_exp_exp
|
||||||
|
/// First Exp
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Exp
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -257,6 +252,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Exp
|
/// Second Exp
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -280,14 +278,10 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_tanh_tanh
|
// CHECK-LABEL: test_tanh_tanh
|
||||||
|
/// First Tanh
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Tanh
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -303,6 +297,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Tanh
|
/// Second Tanh
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -332,14 +329,10 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_sinh_sinh
|
// CHECK-LABEL: test_sinh_sinh
|
||||||
|
/// First Sinh
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Sinh
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -355,6 +348,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Sinh
|
/// Second Sinh
|
||||||
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -384,14 +380,10 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_cosh_cosh
|
// CHECK-LABEL: test_cosh_cosh
|
||||||
|
/// First Cosh
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Cosh
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -407,6 +399,9 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Cosh
|
/// Second Cosh
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -435,14 +430,10 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_sigmoid_sigmoid
|
// CHECK-LABEL: test_sigmoid_sigmoid
|
||||||
|
/// First Sigmoid
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Sigmoid
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -457,6 +448,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Sigmoid
|
/// Second Sigmoid
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -485,14 +479,10 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_relu_relu
|
// CHECK-LABEL: test_relu_relu
|
||||||
|
/// First Relu
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Relu
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -504,6 +494,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Relu
|
/// Second Relu
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -632,14 +625,10 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_elu_elu
|
// CHECK-LABEL: test_elu_elu
|
||||||
|
/// First Elu
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Elu
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -656,6 +645,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Elu
|
/// Second Elu
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -686,14 +678,10 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_leakyrelu_leakyrelu
|
// CHECK-LABEL: test_leakyrelu_leakyrelu
|
||||||
|
/// First LeakyRelu
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First LeakyRelu
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -707,6 +695,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second LeakyRelu
|
/// Second LeakyRelu
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -734,14 +725,10 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_selu_selu
|
// CHECK-LABEL: test_selu_selu
|
||||||
|
/// First Selu
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Selu
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -759,6 +746,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Selu
|
/// Second Selu
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -790,14 +780,10 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_hardsigmoid_hardsigmoid
|
// CHECK-LABEL: test_hardsigmoid_hardsigmoid
|
||||||
|
/// First HardSigmoid
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First HardSigmoid
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -816,6 +802,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second HardSigmoid
|
/// Second HardSigmoid
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
@ -848,14 +837,10 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
"std.return"(%1) : (tensor<*xf32>) -> ()
|
"std.return"(%1) : (tensor<*xf32>) -> ()
|
||||||
|
|
||||||
// CHECK-LABEL: test_reciprocal_reciprocal
|
// CHECK-LABEL: test_reciprocal_reciprocal
|
||||||
|
/// First Reciprocal
|
||||||
// CHECK: [[C0:%.+]] = constant 0 : index
|
// CHECK: [[C0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
|
||||||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
|
||||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
|
||||||
|
|
||||||
/// First Reciprocal
|
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||||
|
@ -866,6 +851,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
||||||
// CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
// CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||||
|
|
||||||
/// Second Reciprocal
|
/// Second Reciprocal
|
||||||
|
// CHECK: [[C0_1:%.+]] = constant 0 : index
|
||||||
|
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||||
|
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
// RUN: onnx-mlir-opt %s | FileCheck %s
|
// RUN: onnx-mlir-opt %s -split-input-file | FileCheck %s
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// CHECK-LABEL: @check_map1(%arg0: tuple<i64, f32>) -> tensor<*xf32> {
|
// CHECK-LABEL: @check_map1(%arg0: tuple<i64, f32>) -> tensor<*xf32> {
|
||||||
|
|
Loading…
Reference in New Issue