Remove optimize_loops/return_loops op. (#200)

* Remove optimize_loops/return_loops op in elementwise ops lowering and fix tests in onnx_lowering.mlir.

* Fix all tests.

* Remove all occurences of def_loops/return_loops.

* Fix test.

* Fix comments for defineLoops & emitKrnlLoopsAndIterationForOperand function.

* Remove emitOptimizedLoops.

* Allow not specifying optimizedLoops when creating KrnlIterateOperandPack.

* Fix style.

* Make BuildKernelLoop helper not emit optimize/return_loop operations & retire emitKrnlLoopsAndIterationForOperand by replacing it with BuildKernelLoop.

* DefineLoops -> DefineLoopsEx, remove redundant emitKrnlLoopsAndIterationForOperand function.

* BuildKrnlLoop API name update.

* Tweak comments.

* Remove unused withEmptyOptimization flag.

* Better comment for BuildKrnlLoop.

* Fully remove krnl.return_loops/optimize_loops op.

* Trigger Windows Build

* Bump windows ci python version.
This commit is contained in:
Tian Jin 2020-07-08 12:49:15 +08:00 committed by GitHub
parent 07757a28ce
commit 01a4977c74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 281 additions and 1028 deletions

View File

@ -13,7 +13,7 @@ jobs:
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.7'
versionSpec: '3.7.8'
architecture: 'x64'
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"

View File

@ -525,26 +525,16 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
SmallVector<Value, 4> loopIVs;
if (!hasAllScalarValues(operands)) {
std::vector<Value> originalLoops;
KrnlOptimizeLoopsOp optimizedLoopsOp;
KrnlIterateOp iterateOp;
emitKrnlLoopsAndIterationForOperand(
rewriter, loc, X, originalLoops, optimizedLoopsOp, iterateOp);
Block &optimizationBlock = optimizedLoopsOp.region().front();
Block &iterationBlock = iterateOp.bodyRegion().front();
// Create iterateOp & get block within iterate op.
BuildKrnlLoop loops(rewriter, loc, memRefType.getRank());
loops.createDefineAndIterateOp(X);
Block *iterationBlock = loops.getIterateBlock();
// 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
rewriter.setInsertionPointToEnd(&optimizationBlock);
// Return from KrnlOptimizeLoopsOp body.
// When no optimizations are present we just return the loops
// unchaged.
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
// 2. Insert instructions inside the KernelIterateOp body.
rewriter.setInsertionPointToStart(&iterationBlock);
// Insert instructions inside the KernelIterateOp body.
rewriter.setInsertionPointToStart(iterationBlock);
// Handle the operation:
for (auto arg : iterationBlock.getArguments())
for (auto arg : iterationBlock->getArguments())
loopIVs.push_back(arg);
}
@ -555,7 +545,6 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
rewriter.create<AffineStoreOp>(loc, loweredOpResult, alloc, loopIVs);
rewriter.replaceOp(op, alloc);
return success();
}
};
@ -598,25 +587,16 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
broadcastedDimInfo =
getBroadcastedDimInfo(loc, rewriter, memRefType, operands);
std::vector<Value> originalLoops;
KrnlOptimizeLoopsOp optimizedLoopsOp;
KrnlIterateOp iterateOp;
emitKrnlLoopsAndIterationForOperand(
rewriter, loc, alloc, originalLoops, optimizedLoopsOp, iterateOp);
Block &optimizationBlock = optimizedLoopsOp.region().front();
Block &iterationBlock = iterateOp.bodyRegion().front();
// Create iterateOp & get block within iterate op.
BuildKrnlLoop loops(rewriter, loc, memRefType.getRank());
loops.createDefineAndIterateOp(alloc);
Block *iterationBlock = loops.getIterateBlock();
// 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
rewriter.setInsertionPointToEnd(&optimizationBlock);
// Return from KrnlOptimizeLoopsOp body.
// When no optimizations are present we just return the loops unchaged.
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
// 2. Insert instructions inside the KernelIterateOp body.
rewriter.setInsertionPointToStart(&iterationBlock);
// Insert instructions inside the KernelIterateOp body.
rewriter.setInsertionPointToStart(iterationBlock);
// Handle the operation:
for (auto arg : iterationBlock.getArguments())
for (auto arg : iterationBlock->getArguments())
loopIVs.push_back(arg);
}
// Fold over operands for each of their scalar values.

View File

@ -72,9 +72,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
// Define loops.
std::vector<Value> originalLoops;
std::vector<Value> optimizedLoops;
Block *optimizationBlock =
defineLoops(rewriter, loc, originalLoops, optimizedLoops, numLoops);
defineLoops(rewriter, loc, originalLoops, numLoops);
// We have two Krnl loops:
// - Outer loop iterates over the output matrix dimensions, and
@ -84,23 +82,18 @@ struct ONNXGemmOpLowering : public ConversionPattern {
std::vector<Value> outerLoops, optimizedOuterLoops;
outerLoops.reserve(2);
optimizedOuterLoops.reserve(2);
for (int i = 0; i < 2; ++i) {
for (int i = 0; i < 2; ++i)
outerLoops.push_back(originalLoops[i]);
optimizedOuterLoops.push_back(optimizedLoops[i]);
}
KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops);
KrnlIterateOperandPack outerPack(rewriter, outerLoops);
// Induction variables for the outer loops
for (int i = 0; i < 2; ++i)
addDimensionToPack(rewriter, loc, outerPack, alloc, i);
// Reduction loop
std::vector<Value> reductionLoops, optimizedReductionLoops;
std::vector<Value> reductionLoops;
reductionLoops.reserve(1);
optimizedReductionLoops.reserve(1);
reductionLoops.push_back(originalLoops[2]);
optimizedReductionLoops.push_back(optimizedLoops[2]);
KrnlIterateOperandPack reductionPack(
rewriter, reductionLoops, optimizedReductionLoops);
KrnlIterateOperandPack reductionPack(rewriter, reductionLoops);
// Induction variable for the reduction dimension
// Try to find and use a static value from A or B first.
// If it failed then use a dynamic value.
@ -140,10 +133,6 @@ struct ONNXGemmOpLowering : public ConversionPattern {
// Now perform the insertions into the body of the
// just generated instructions:
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
// Insert instructions inside the outer loop.
Block &outerIterationBlock = outerIterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&outerIterationBlock);
@ -154,14 +143,15 @@ struct ONNXGemmOpLowering : public ConversionPattern {
loopMNIVs.emplace_back(arg);
}
// Initialize the output of A*B
// Initialize the output of A * B
auto zero = emitConstantOp(rewriter, loc, memRefType.getElementType(), 0);
rewriter.create<AffineStoreOp>(loc, zero, alloc, loopMNIVs);
// Compute A*B
// Compute A * B
auto matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, reductionPack);
// Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting)
// Compute beta * C, and add up to alpha * A * B (unidirectional
// broadcasting)
auto loadedAB = rewriter.create<AffineLoadOp>(loc, alloc, loopMNIVs);
auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB);
if (hasBias) {
@ -175,7 +165,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
rewriter.create<AffineStoreOp>(loc, alphaAB, alloc, loopMNIVs);
}
// Insert instructions to do matrix multiplication: A*B
// Insert instructions to do matrix multiplication: A * B
Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&matmulIterationBlock);

View File

@ -117,9 +117,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
// Define loops for batch dimensions.
std::vector<Value> originalLoops;
std::vector<Value> optimizedLoops;
Block *optimizationBlock = defineLoops(
rewriter, loc, originalLoops, optimizedLoops, memRefShape.size());
defineLoops(rewriter, loc, originalLoops, memRefShape.size());
// Outer KrnlIterateOp
SmallVector<Value, 4> loopBatchIVs;
@ -131,24 +129,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
for (int i = 0; i < memRefShape.size() - matmulResultDims; ++i)
batchAxes.emplace_back(i);
std::vector<Value> outerLoops, optimizedOuterLoops;
std::vector<Value> outerLoops;
outerLoops.reserve(batchAxes.size());
optimizedOuterLoops.reserve(batchAxes.size());
for (int i = 0; i < batchAxes.size(); ++i) {
for (int i = 0; i < batchAxes.size(); ++i)
outerLoops.push_back(originalLoops[i]);
optimizedOuterLoops.push_back(optimizedLoops[i]);
}
KrnlIterateOperandPack outerPack(
rewriter, outerLoops, optimizedOuterLoops);
KrnlIterateOperandPack outerPack(rewriter, outerLoops);
for (int i = 0; i < batchAxes.size(); ++i) {
addDimensionToPack(rewriter, loc, outerPack, alloc, i);
}
auto outerIterateOp = rewriter.create<KrnlIterateOp>(loc, outerPack);
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
// Insert instructions into the outer KrnlIterateOp.
Block &outerIterationBlock = outerIterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&outerIterationBlock);
@ -165,18 +156,14 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
// Create a KrnlIterateOp for matrix multiplication.
KrnlIterateOp matmulIterateOp;
std::vector<Value> matmulLoops, optimizedMatmulLoops;
std::vector<Value> matmulLoops;
if (AShape.size() >= 2 && BShape.size() >= 2) {
// 2-D x 2-D. Result has two dimensions.
matmulLoops.reserve(2);
optimizedMatmulLoops.reserve(2);
for (int i = 2; i > 0; --i) {
matmulLoops.emplace_back(originalLoops[memRefShape.size() - i]);
optimizedMatmulLoops.emplace_back(
optimizedLoops[memRefShape.size() - i]);
}
KrnlIterateOperandPack matmulPack(
rewriter, matmulLoops, optimizedMatmulLoops);
KrnlIterateOperandPack matmulPack(rewriter, matmulLoops);
for (int i = 2; i > 0; --i) {
addDimensionToPack(
rewriter, loc, matmulPack, alloc, memRefShape.size() - i);
@ -185,23 +172,13 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
} else {
// 1-D x 2-D, and vice versa. Result has one dimension.
matmulLoops.reserve(1);
optimizedMatmulLoops.reserve(1);
matmulLoops.emplace_back(originalLoops[memRefShape.size() - 1]);
optimizedMatmulLoops.emplace_back(
optimizedLoops[memRefShape.size() - 1]);
KrnlIterateOperandPack matmulPack(
rewriter, matmulLoops, optimizedMatmulLoops);
KrnlIterateOperandPack matmulPack(rewriter, matmulLoops);
addDimensionToPack(
rewriter, loc, matmulPack, alloc, memRefShape.size() - 1);
matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, matmulPack);
}
if (!hasBatchLoop) {
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
}
// Insert instructions into the matmul KrnlIterateOp.
Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&matmulIterationBlock);
@ -226,18 +203,11 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
// Iterate along the reduction dimension.
// Use a value from A.
std::vector<Value> reduceLoops;
std::vector<Value> optimizedReduceLoops;
Block *optimizationReduceBlock =
defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1);
KrnlIterateOperandPack reducePack(
rewriter, reduceLoops, optimizedReduceLoops);
defineLoops(rewriter, loc, reduceLoops, 1);
KrnlIterateOperandPack reducePack(rewriter, reduceLoops);
addDimensionToPack(rewriter, loc, reducePack, A, AShape.size() - 1);
auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack);
// No optimization
rewriter.setInsertionPointToEnd(optimizationReduceBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops);
// Insert instructions into the reduction KrnlIterateOp.
Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&reduceIterationBlock);
@ -288,18 +258,12 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
// Iterate along the reduction dimension.
// Use a value from A.
std::vector<Value> reduceLoops;
std::vector<Value> optimizedReduceLoops;
Block *optimizationReduceBlock =
defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1);
KrnlIterateOperandPack reducePack(
rewriter, reduceLoops, optimizedReduceLoops);
defineLoops(rewriter, loc, reduceLoops, 1);
KrnlIterateOperandPack reducePack(rewriter, reduceLoops);
addDimensionToPack(rewriter, loc, reducePack, A, 0);
auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack);
// No optimization
rewriter.setInsertionPointToEnd(optimizationReduceBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops);
// Insert instructions into the reduction KrnlIterateOp.
Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&reduceIterationBlock);

View File

@ -183,13 +183,10 @@ struct ONNXReductionOpLowering : public ConversionPattern {
// Define loops to initialize the result.
std::vector<Value> originalLoopsInit;
std::vector<Value> optimizedLoopsInit;
Block *optimizationBlockInit = defineLoops(
rewriter, loc, originalLoopsInit, optimizedLoopsInit, outRank);
defineLoops(rewriter, loc, originalLoopsInit, outRank);
// Iteration information
KrnlIterateOperandPack packInit(
rewriter, originalLoopsInit, optimizedLoopsInit);
KrnlIterateOperandPack packInit(rewriter, originalLoopsInit);
for (decltype(outRank) i = 0; i < outRank; ++i) {
addDimensionToPack(rewriter, loc, packInit, alloc, i);
}
@ -197,9 +194,6 @@ struct ONNXReductionOpLowering : public ConversionPattern {
Block &iterationBlockInit = iterateOpInit.bodyRegion().front();
// Perform the insertions into the body of the initialization loop.
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlockInit);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoopsInit);
// Insert instructions inside the KernelIterateOp body.
rewriter.setInsertionPointToStart(&iterationBlockInit);
@ -216,11 +210,10 @@ struct ONNXReductionOpLowering : public ConversionPattern {
// Define an Krnl loop to do reduction.
rewriter.setInsertionPointAfter(iterateOpInit);
std::vector<Value> originalLoops, optimizedLoops;
Block *optimizationBlock =
defineLoops(rewriter, loc, originalLoops, optimizedLoops, inRank);
std::vector<Value> originalLoops;
defineLoops(rewriter, loc, originalLoops, inRank);
// Iteration information
KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
KrnlIterateOperandPack pack(rewriter, originalLoops);
for (decltype(inRank) i = 0; i < inRank; ++i) {
addDimensionToPack(rewriter, loc, pack, operands[0], i);
}
@ -228,10 +221,6 @@ struct ONNXReductionOpLowering : public ConversionPattern {
Block &iterationBlock = iterateOp.bodyRegion().front();
// Perform the insertions into the body of the reduction loop.
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
// Insert instructions inside the KernelIterateOp body.
rewriter.setInsertionPointToStart(&iterationBlock);

View File

@ -54,9 +54,7 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
// Define loops.
std::vector<Value> originalLoops;
std::vector<Value> optimizedLoops;
Block *optimizationBlock =
defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
defineLoops(rewriter, loc, originalLoops, rank);
// Coerce the input into a 2-D tensor. `axis` will be the coercing point.
// This coercing follows the softmax definition in ONNX:
@ -65,26 +63,22 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
// dimensions. The outer loop is only created once `axis` is not zero.
// Define an outer loop with respect to axis.
std::vector<Value> outerLoops, optimizedOuterLoops;
std::vector<Value> outerLoops;
outerLoops.reserve(axis);
optimizedOuterLoops.reserve(axis);
for (int i = 0; i < axis; ++i) {
outerLoops.push_back(originalLoops[i]);
optimizedOuterLoops.push_back(optimizedLoops[i]);
}
KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops);
KrnlIterateOperandPack outerPack(rewriter, outerLoops);
for (int i = 0; i < axis; ++i)
addDimensionToPack(rewriter, loc, outerPack, input, i);
// Define an inner loop with respect to axis.
std::vector<Value> innerLoops, optimizedInnerLoops;
std::vector<Value> innerLoops;
innerLoops.reserve(rank - axis);
optimizedInnerLoops.reserve(rank - axis);
for (int i = axis; i < rank; ++i) {
innerLoops.push_back(originalLoops[i]);
optimizedInnerLoops.push_back(optimizedLoops[i]);
}
KrnlIterateOperandPack innerPack(rewriter, innerLoops, optimizedInnerLoops);
KrnlIterateOperandPack innerPack(rewriter, innerLoops);
for (int i = axis; i < rank; ++i)
addDimensionToPack(rewriter, loc, innerPack, input, i);
@ -93,10 +87,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
if (axis != 0) {
outerIterateOp = rewriter.create<KrnlIterateOp>(loc, outerPack);
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
// Insert instructions inside the outer loop.
Block &outerIterationBlock = outerIterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&outerIterationBlock);
@ -126,10 +116,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
sumIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack);
// Create an inner loop to compute softmax.
softmaxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack);
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
}
// Insert instructions inside the max loop.

View File

@ -107,7 +107,7 @@ struct ONNXConvOpLowering : public ConversionPattern {
// 1. Define outer loops and emit empty optimization block:
int64_t nOuterLoops = (group > 1) ? 3 : 2;
BuildKrnlLoop outerLoops(rewriter, loc, nOuterLoops);
outerLoops.createDefineAndOptimizeOp();
outerLoops.createDefineOp();
// for n = 0 .. N:
int nIndex = outerLoops.pushBounds(0, inputOperand, 0);
// for g = 0 .. N:
@ -142,7 +142,7 @@ struct ONNXConvOpLowering : public ConversionPattern {
// 2.2 Define spatial loops
int64_t nSpatialLoops = resultShape.size() - 2;
BuildKrnlLoop spatialLoops(rewriter, loc, nSpatialLoops);
spatialLoops.createDefineAndOptimizeOp();
spatialLoops.createDefineOp();
for (int i = 2; i < resultShape.size(); ++i)
spatialLoops.pushBounds(0, alloc, i);
@ -168,7 +168,7 @@ struct ONNXConvOpLowering : public ConversionPattern {
// 3.2 Define inner loops.
int64_t nInnerLoops = 1 + (kernelShape.size() - 2);
BuildKrnlLoop innerLoops(rewriter, loc, nInnerLoops);
innerLoops.createDefineAndOptimizeOp();
innerLoops.createDefineOp();
// for c = 0 .. C/group
int cIndex = innerLoops.pushBounds(0, kernelShape[1]);
// for Kx = 0 .. KX

View File

@ -57,9 +57,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
int64_t rank = memRefType.getRank();
std::vector<Value> originalLoops;
std::vector<Value> optimizedLoops;
Block *optimizationBlock =
defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
defineLoops(rewriter, loc, originalLoops, rank);
// Create a KrnlIterateOp along C dimension.
// This will be the outer-most loop in order to re-use scale, bias,
@ -67,8 +65,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
SmallVector<Value, 1> loopCIVs;
if (rank > 1) {
KrnlIterateOperandPack cPack(
rewriter, originalLoops[1], optimizedLoops[1]);
KrnlIterateOperandPack cPack(rewriter, originalLoops[1]);
addDimensionToPack(rewriter, loc, cPack, operand, 1);
auto cIterateOp = rewriter.create<KrnlIterateOp>(loc, cPack);
Block &cIterationBlock = cIterateOp.bodyRegion().front();
@ -89,21 +86,16 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
axes.emplace_back(0);
for (int64_t i = 2; i < rank; ++i)
axes.emplace_back(i);
std::vector<Value> packLoops, packOptimizedLoops;
std::vector<Value> packLoops;
for (int i = 0; i < axes.size(); ++i) {
packLoops.emplace_back(originalLoops[axes[i]]);
packOptimizedLoops.emplace_back(optimizedLoops[axes[i]]);
}
KrnlIterateOperandPack pack(rewriter, packLoops, packOptimizedLoops);
KrnlIterateOperandPack pack(rewriter, packLoops);
for (int i = 0; i < axes.size(); ++i) {
addDimensionToPack(rewriter, loc, pack, operand, axes[i]);
}
auto iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
// No optimization
rewriter.setInsertionPointToEnd(optimizationBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
Block &iterationBlock = iterateOp.bodyRegion().front();
rewriter.setInsertionPointToStart(&iterationBlock);

View File

@ -332,7 +332,7 @@ struct ONNXPoolOpLowering : public ConversionPattern {
// for ho in range(HO):
// for wo in range(WO):
BuildKrnlLoop outputLoops(rewriter, loc, outputShape.size());
outputLoops.createDefineOptimizeAndIterateOp(alloc);
outputLoops.createDefineAndIterateOp(alloc);
auto ipMainRegion = rewriter.saveInsertionPoint();
rewriter.setInsertionPointToStart(outputLoops.getIterateBlock());
@ -475,7 +475,7 @@ struct ONNXPoolOpLowering : public ConversionPattern {
// output[n][c][ho][wo] =
// emitScalarOpFor(output[n][c][ho][wo], input[n, c, hi, wi]);
BuildKrnlLoop poolingLoops(rewriter, loc, kernelShape.size());
poolingLoops.createDefineAndOptimizeOp();
poolingLoops.createDefineOp();
for (int i = 0; i < kernelShape.size(); ++i)
poolingLoops.pushBounds(
0, poolDimMap, llvm::makeArrayRef(IVsAndConstants[i]));

View File

@ -190,59 +190,13 @@ void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc,
}
}
// Function that defines the KRNL dialect loops and their respective
// optimized version.
KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter,
Location loc, std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
int64_t numLoops) {
// Define loops.
// Function that emits the definition of loops references.
void defineLoops(ConversionPatternRewriter &rewriter, Location loc,
std::vector<Value> &loops, int64_t numLoops) {
auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, numLoops);
loops.reserve(numLoops);
for (auto result : loopsOp.getResults())
loops.push_back(result);
// Define optimized version of the loops.
auto optimizedLoopsOp = rewriter.create<KrnlOptimizeLoopsOp>(loc, numLoops);
optimizedLoops.reserve(numLoops);
for (auto result : optimizedLoopsOp.getResults())
optimizedLoops.push_back(result);
return optimizedLoopsOp;
}
// Function that emits the loops and their optimized version.
// The function returns a reference to the inner optimization block.
Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc,
std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
int64_t numLoops) {
KrnlOptimizeLoopsOp optimizedLoopsOp =
emitOptimizedLoops(rewriter, loc, loops, optimizedLoops, numLoops);
return &optimizedLoopsOp.region().front();
}
// Function which emits a basic set of loops and optimized loops
// for a given operation argument. A reference to the loop optimization
// block is returned in the last argument of the function.
void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter,
Location loc, Value operand, std::vector<Value> &originalLoops,
KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp) {
// Operand shape.
auto shape = operand.getType().cast<MemRefType>().getShape();
// Number of loops.
int64_t rank = shape.size();
// Define loops and optimized loops.
std::vector<Value> optimizedLoops;
optimizedLoopsOp =
emitOptimizedLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
// Iterate over the loop nest.
for (int i = 0; i < rank; ++i)
addDimensionToPack(rewriter, loc, pack, operand, i);
iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
}
unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {

View File

@ -63,24 +63,10 @@ std::map<int64_t, int64_t> getReductionMapping(
void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc,
KrnlIterateOperandPack &pack, Value operand, int index);
// Function that defines the KRNL dialect loops and their respective
// optimized version.
KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter,
Location loc, std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
int64_t numLoops);
// Function that emits the loops and their optimized version.
// The function returns a reference to the inner optimization block.
Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc,
std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
int64_t numLoops);
// Function which emits a basic set of loops and optimized loops
// for a given operation argument. A reference to the loop optimization
// block is returned in the last argument of the function.
void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter,
Location loc, Value operand, std::vector<Value> &originalLoops,
KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp);
// Function that emits the define_loop operation to define `numLoops`
// number of krnl loops, and fill `loop` with the newly defined loops.
void defineLoops(ConversionPatternRewriter &rewriter, Location loc,
std::vector<Value> &loops, int64_t numLoops);
unsigned getMemRefEltSizeInBytes(MemRefType memRefType);

View File

@ -212,7 +212,7 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
operandAdaptor.X().getType().cast<ShapedType>().getElementType(), 0);
int nLoops = 3;
BuildKrnlLoop initializationLoops(rewriter, loc, nLoops);
initializationLoops.createDefineOptimizeAndIterateOp(state.ht);
initializationLoops.createDefineAndIterateOp(state.ht);
auto ipInitializationLoops = rewriter.saveInsertionPoint();
rewriter.setInsertionPointToStart(initializationLoops.getIterateBlock());
{
@ -292,7 +292,7 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
// compute it, ft, ct, Ct, ot, Ht
BuildKrnlLoop stateLoops(rewriter, loc, 2);
stateLoops.createDefineAndOptimizeOp();
stateLoops.createDefineOp();
stateLoops.pushBounds(0, batchDimSize);
stateLoops.pushBounds(0, hiddenDimSize);
stateLoops.createIterateOp();
@ -372,7 +372,7 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
{ // Emit instructions for matrix multiplications.
// input_size is the reduction dimension.
BuildKrnlLoop reductionLoops(rewriter, loc, 1);
reductionLoops.createDefineAndOptimizeOp();
reductionLoops.createDefineOp();
reductionLoops.pushBounds(0, inputDimSize);
reductionLoops.createIterateOp();

View File

@ -93,7 +93,7 @@ struct ONNXRNNOpLowering : public ConversionPattern {
if (direction == FORWARD || direction == BIDIRECTIONAL) {
BuildKrnlLoop sequenceLoops(rewriter, loc, 1);
sequenceLoops.createDefineAndOptimizeOp();
sequenceLoops.createDefineOp();
sequenceLoops.pushBounds(0, sequenceDimSize);
sequenceLoops.createIterateOp();
@ -112,7 +112,7 @@ struct ONNXRNNOpLowering : public ConversionPattern {
if (direction == REVERSE || direction == BIDIRECTIONAL) {
BuildKrnlLoop sequenceLoops(rewriter, loc, 1);
sequenceLoops.createDefineAndOptimizeOp();
sequenceLoops.createDefineOp();
sequenceLoops.pushBounds(0, sequenceDimSize);
sequenceLoops.createIterateOp();

View File

@ -46,7 +46,7 @@ struct ONNXConcatOpLowering : public ConversionPattern {
auto currShape = operands[i].getType().cast<MemRefType>().getShape();
// Create loop.
BuildKrnlLoop inputLoops(rewriter, loc, rank);
inputLoops.createDefineAndOptimizeOp();
inputLoops.createDefineOp();
for (int r = 0; r < rank; ++r)
inputLoops.pushBounds(0, operands[i], r);
inputLoops.createIterateOp();

View File

@ -63,14 +63,14 @@ struct ONNXPadOpLowering : public ConversionPattern {
// Iterate over the loop nest using the output shape.
BuildKrnlLoop padLoops(rewriter, loc, rank);
padLoops.createDefineAndOptimizeOp();
padLoops.createDefineOp();
for (int i = 0; i < rank; ++i)
padLoops.pushBounds(0, alloc, i);
padLoops.createIterateOp();
// Iterate over the loop nest using the input shape.
BuildKrnlLoop valueLoops(rewriter, loc, rank);
valueLoops.createDefineAndOptimizeOp();
valueLoops.createDefineOp();
for (int i = 0; i < rank; ++i)
valueLoops.pushBounds(0, operandAdaptor.data(), i);
valueLoops.createIterateOp();

View File

@ -46,14 +46,14 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern {
// Iterate over the loop nest using the output shape.
BuildKrnlLoop padLoops(rewriter, loc, rank);
padLoops.createDefineAndOptimizeOp();
padLoops.createDefineOp();
for (int i = 0; i < rank; ++i)
padLoops.pushBounds(0, alloc, i);
padLoops.createIterateOp();
// Iterate over the loop nest using the input shape.
BuildKrnlLoop valueLoops(rewriter, loc, rank);
valueLoops.createDefineAndOptimizeOp();
valueLoops.createDefineOp();
for (int i = 0; i < rank; ++i)
valueLoops.pushBounds(0, operandAdaptor.data(), i);
valueLoops.createIterateOp();

View File

@ -70,7 +70,7 @@ struct ONNXSplitOpLowering : public ConversionPattern {
OpBuilder::InsertionGuard insertGuard(rewriter);
// Create loop.
BuildKrnlLoop outputLoops(rewriter, loc, rank);
outputLoops.createDefineOptimizeAndIterateOp(allocs[i]);
outputLoops.createDefineAndIterateOp(allocs[i]);
outputLoops.createIterateOp();
rewriter.setInsertionPointToStart(outputLoops.getIterateBlock());
// Indices for the read and write.

View File

@ -38,11 +38,9 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
// Define loops.
std::vector<Value> originalLoops;
std::vector<Value> optimizedLoops;
Block *optimizationBlock =
defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
defineLoops(rewriter, loc, originalLoops, rank);
KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
KrnlIterateOperandPack pack(rewriter, originalLoops);
// Iterate over the loop nest using the input shape.
for (int i = 0; i < rank; ++i)
addDimensionToPack(rewriter, loc, pack, data, i);
@ -53,14 +51,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
// Now perform the insertions into the body of the
// just generated instructions:
// 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
rewriter.setInsertionPointToEnd(optimizationBlock);
// Return from KrnlOptimizeLoopsOp body.
// When no optimizations are present we just return the loops
// unchaged.
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
// 2. Insert instructions inside the KernelIterateOp body.
// Insert instructions inside the KernelIterateOp body.
rewriter.setInsertionPointToStart(&iterationBlock);
// Handle the operation.

View File

@ -161,8 +161,7 @@ void KrnlIterateOperandPack::pushAffineMapBound(
BuildKrnlLoop::BuildKrnlLoop(
ConversionPatternRewriter &rewriter, Location loc, int loopNum)
: rewriter(rewriter), loc(loc), originalLoopNum(loopNum), pack(NULL),
pushCount(0), createdDefineOp(false), createdOptimizeOp(false),
createdIterateOp(false) {
pushCount(0), createdDefineOp(false), createdIterateOp(false) {
if (originalLoopNum <= 0)
emitError(loc, "Expected positive number of original loops.");
}
@ -177,7 +176,7 @@ BuildKrnlLoop::~BuildKrnlLoop() {
free(pack);
}
void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
void BuildKrnlLoop::createDefineOp() {
// Insert define loop operation.
auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, originalLoopNum);
originalLoops.reserve(originalLoopNum);
@ -185,25 +184,8 @@ void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
originalLoops.push_back(result);
createdDefineOp = true;
// Insert optimize loop operation.
auto optimizedLoopsOp =
rewriter.create<KrnlOptimizeLoopsOp>(loc, originalLoopNum);
optLoops.reserve(originalLoopNum);
// Emit empty optimizations if flag is set.
if (withEmptyOptimization) {
for (auto result : optimizedLoopsOp.getResults())
optLoops.push_back(result);
optBlock = &optimizedLoopsOp.region().front();
auto ip = rewriter.saveInsertionPoint();
rewriter.setInsertionPointToEnd(optBlock);
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
rewriter.restoreInsertionPoint(ip);
}
createdOptimizeOp = true;
// prepare data structure to push bounds
pack = new KrnlIterateOperandPack(rewriter, originalLoops, optLoops);
pack = new KrnlIterateOperandPack(rewriter, originalLoops);
}
int BuildKrnlLoop::pushBounds(int64_t lowerBound, int64_t upperBound) {
@ -254,9 +236,6 @@ void BuildKrnlLoop::createIterateOp() {
// Loop definition operation is mandatory.
assert(createdDefineOp && "Must create define op before iterate op.");
// Loop optimization operation is mandatory (for now).
assert(createdOptimizeOp && "Must create optimize op before iterate op.");
// Check if all bounds have been defined.
assert(pushCount == originalLoopNum &&
"Must push bounds for all original loops.");
@ -267,15 +246,14 @@ void BuildKrnlLoop::createIterateOp() {
createdIterateOp = true;
}
void BuildKrnlLoop::createDefineOptimizeAndIterateOp(
Value memRefOperand, bool withEmptyOptimization) {
void BuildKrnlLoop::createDefineAndIterateOp(Value memRefOperand) {
// Rank of the MemRef operand. We will emit a loop for each dimension.
int loopNum = memRefOperand.getType().cast<MemRefType>().getShape().size();
assert(originalLoopNum == loopNum &&
"Mismatch in loop numbers from constructor and define.");
// Emit the definition and the optimization operations for the loop nest.
createDefineAndOptimizeOp(withEmptyOptimization);
createDefineOp();
// Push a lower-upper bound pair for each dimension of the MemRef operand.
// The lower bound in this case is always zero.

View File

@ -83,6 +83,13 @@ struct KrnlIterateOperandPack {
_operands.end(), optimizedLoops.begin(), optimizedLoops.end());
}
// Create a pack with optimizedLoops = inputLoops (ie., no optimization).
KrnlIterateOperandPack(
mlir::Builder &builder, llvm::ArrayRef<mlir::Value> inputLoops)
: builder(builder), inputLoops(inputLoops), optimizedLoops(inputLoops) {
_operands.insert(_operands.end(), inputLoops.begin(), inputLoops.end());
}
void pushConstantBound(int64_t bound);
void pushOperandBound(mlir::Value operand);
@ -112,19 +119,15 @@ private:
};
// Helper function to write kernel loops. This class will let us build a single
// define/optimize/iterate operation combo. We can then insert optimizations in
// the body of the optimization operation, and operations in the body of the
// iterate operation.
// define/iterate operation combo. We can then insert operations in the body of
// the iterate operation.
//
// The sequence is as follow:
//
// 1) Create an object giving the rewriter, location, and number of loop in
// the original (non optimized) loop.
//
// 2) Create define & optimize ops (currently paired). Optimizations can then
// be added to the inner block of the optimize operation. Make sure to set
// the insertion point to that block for optimizations to go in the right
// place.
// 2) Create define_loops ops to define new loop variables.
//
// 3) Push the bounds for each of the original loops. Bounds are pushed in
// pairs (lower & upper bounds). There are a few methods to do it depending
@ -153,7 +156,7 @@ public:
// Create define and optimize loop with loopNum original loops. If
// withEmptyOptimization is true, the optimization is simply the identity
// function (no optimizations).
void createDefineAndOptimizeOp(bool withEmptyOptimization = true);
void createDefineOp();
// Push bounds (lower and upper) for each of the loops (order matters).
// The function returns the order number associated with the loop iteration.
@ -172,13 +175,12 @@ public:
// operations associated with this loop nest have been emitted already.
void createIterateOp();
// Create the loop nest definition, optimization and iteration operations
// Create the loop nest definition and iteration operations
// for a given operand of MemRef type. The loop nest has a depth equal to the
// rank of the MemRef operand. The lower bound of each loop is zero. The
// upper bound of each loop is given by the corresponding dimension of the
// MemRef operand.
void createDefineOptimizeAndIterateOp(
Value memRefOperand, bool withEmptyOptimization = true);
void createDefineAndIterateOp(Value memRefOperand);
// Get the (original loop) induction variable associated with the given
// index. Use the index returned when pushing the bounds.
@ -220,7 +222,6 @@ private:
// Flags that keep track of emitted operations.
bool createdDefineOp;
bool createdOptimizeOp;
bool createdIterateOp;
// Saved insertion point in the code region of the KrnlOptimizeLoopsOp.

View File

@ -78,47 +78,6 @@ ParseResult parseKrnlDefineLoopsOp(
return success();
}
//===----------------------------------------------------------------------===//
// KrnlOptimizeLoopsOp
//===----------------------------------------------------------------------===//
void KrnlOptimizeLoopsOp::build(
OpBuilder &builder, OperationState &result, int num_optimized_loops) {
result.types.append(num_optimized_loops, LoopType::get(builder.getContext()));
// Create a region and a block for the body.
// Schedule intrinsics will be placed into this region.
Region *region = result.addRegion();
auto *body = new Block();
region->push_back(body);
}
void print(OpAsmPrinter &p, KrnlOptimizeLoopsOp &op) {
p << "krnl.optimize_loops ";
p.printRegion(op.region(), /*printEntryBlockArgs=*/false,
/*printBlockTerminators=*/true);
p << " : ";
p.printFunctionalType(op);
}
ParseResult parseKrnlOptimizeLoopsOp(
OpAsmParser &parser, OperationState &result) {
// Parse the schedule body region.
Region *region = result.addRegion();
if (parser.parseRegion(*region, llvm::None, llvm::None))
return failure();
// Parse the function type for the schedule operation.
// Then following the hint of this parsed function type, parse the
// returned timestamp space dimension handlers.
FunctionType schedule_func_type;
if (parser.parseColonType(schedule_func_type) ||
parser.addTypesToList(schedule_func_type.getResults(), result.types)) {
failure();
}
return success();
}
//===----------------------------------------------------------------------===//
// KrnlIterateOp
//===----------------------------------------------------------------------===//
@ -340,26 +299,9 @@ static LogicalResult verify(KrnlIterateOp op) {
}
//===----------------------------------------------------------------------===//
// KrnlReturnLoopsOp
// KrnlEntryPointOp
//===----------------------------------------------------------------------===//
void print(OpAsmPrinter &p, KrnlReturnLoopsOp &op) {
p << "krnl.return_loops ";
p.printOperands(op.operand_begin(), op.operand_end());
}
ParseResult parseKrnlReturnLoopsOp(
OpAsmParser &parser, OperationState &result) {
// Parse the loops to return.
SmallVector<OpAsmParser::OperandType, 4> timestamp_dim_handlers;
if (parser.parseOperandList(timestamp_dim_handlers) ||
parser.resolveOperands(timestamp_dim_handlers,
LoopType::get(result.getContext()), result.operands))
return failure();
return success();
}
void KrnlEntryPointOp::build(mlir::OpBuilder &builder, OperationState &state,
SymbolRefAttr funcAttr, IntegerAttr numInputs, IntegerAttr numOutputs) {
state.addAttribute(KrnlEntryPointOp::getEntryPointFuncAttrName(), funcAttr);

View File

@ -48,32 +48,6 @@ def KrnlDefineLoopsOp : Op<Krnl_Dialect, "define_loops"> {
}];
}
def KrnlOptimizeLoopsOp : Op<Krnl_Dialect, "optimize_loops"> {
let summary = "optimize_loops operation";
let description = [{
The "krnl.optimize_loops" operation is essentially a cosmetic operation
which exists to encapsulate a region where loops are being scheduled /
optimized.
The optimized loops are returned at the end of the region associated with
the krnl.optimize_loops operation.
For example : TBD once we have actual schedule intrinsics.
}];
let arguments = (ins Variadic<AnyType>);
let results = (outs Variadic<AnyType>);
let regions = (region SizedRegion<1>:$region);
let skipDefaultBuilders = 1;
let builders = [ OpBuilder<"OpBuilder &builder, OperationState &result, "
"int timestamp_space_rank"> ];
let printer = [{ return ::print(p, *this); }];
let parser = [{ return ::parse$cppClass(parser, result); }];
}
def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator, AffineScope]> {
let summary = "iterate operation";
let description = [{
@ -129,19 +103,6 @@ def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator, AffineS
let verifier = [{ return ::verify(*this); }];
}
def KrnlReturnLoopsOp : Op<Krnl_Dialect, "return_loops", [Terminator]> {
let summary = "Krnl return handler operation";
let description = [{
Krnl return_loops operation is a terminator operation for returning
scheduled dimension handlers in the krnl.optimize_loops region.
}];
let arguments = (ins Variadic<AnyType>);
let printer = [{ return ::print(p, *this); }];
let parser = [{ return ::parse$cppClass(parser, result); }];
}
def KrnlTerminatorOp : Op<Krnl_Dialect, "terminate", [Terminator]> {
let summary = "Krnl terminator operation";
let description = [{

View File

@ -125,21 +125,6 @@ public:
// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
//===----------------------------------------------------------------------===//
class KrnlOptimizeLoopsLowering : public OpRewritePattern<KrnlOptimizeLoopsOp> {
public:
using OpRewritePattern<KrnlOptimizeLoopsOp>::OpRewritePattern;
LogicalResult matchAndRewrite(
KrnlOptimizeLoopsOp op, PatternRewriter &rewriter) const override {
rewriter.eraseOp(op);
return success();
}
};
//===----------------------------------------------------------------------===//
// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
//===----------------------------------------------------------------------===//
class KrnlBlockOpLowering : public OpRewritePattern<KrnlBlockOp> {
public:
using OpRewritePattern<KrnlBlockOp>::OpRewritePattern;
@ -151,21 +136,6 @@ public:
}
};
//===----------------------------------------------------------------------===//
// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
//===----------------------------------------------------------------------===//
class KrnlReturnLoopOpLowering : public OpRewritePattern<KrnlReturnLoopsOp> {
public:
using OpRewritePattern<KrnlReturnLoopsOp>::OpRewritePattern;
LogicalResult matchAndRewrite(
KrnlReturnLoopsOp op, PatternRewriter &rewriter) const override {
rewriter.eraseOp(op);
return success();
}
};
//===----------------------------------------------------------------------===//
// KrnlToAffineLoweringPass
//===----------------------------------------------------------------------===//
@ -230,14 +200,11 @@ void KrnlToAffineLoweringPass::runOnFunction() {
OwningRewritePatternList patterns;
patterns.insert<KrnlTerminatorLowering, KrnlDefineLoopsLowering,
KrnlOptimizeLoopsLowering, KrnlBlockOpLowering, KrnlReturnLoopOpLowering>(
&getContext());
KrnlBlockOpLowering>(&getContext());
// Do not lower operations that pertain to schedules just yet.
target.addLegalOp<KrnlBlockOp>();
target.addLegalOp<KrnlDefineLoopsOp>();
target.addLegalOp<KrnlOptimizeLoopsOp>();
target.addLegalOp<KrnlReturnLoopsOp>();
if (failed(applyPartialConversion(function, target, patterns)))
return signalPassFailure();
@ -312,8 +279,6 @@ void KrnlToAffineLoweringPass::runOnFunction() {
// Remove/lower schedule related operations.
target.addIllegalOp<KrnlDefineLoopsOp>();
target.addIllegalOp<KrnlBlockOp>();
target.addIllegalOp<KrnlOptimizeLoopsOp>();
target.addIllegalOp<KrnlReturnLoopsOp>();
if (failed(applyPartialConversion(function, target, patterns)))
return signalPassFailure();
}

View File

@ -12,9 +12,6 @@
func @simple_iterate(%N : index) {
%ii, %ij, %ik = krnl.define_loops 3
%oi, %oj, %ok = krnl.optimize_loops {
krnl.return_loops %ii, %ij, %ik
} : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
@ -22,18 +19,18 @@ func @simple_iterate(%N : index) {
// GENERIC-NEXT: bounds = [#{{.*}}, #{{.*}}, #{{.*}}, #{{.*}}]
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 1 to 11) {
krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) {
krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) {
}
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) {
krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) {
krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) {
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}) ( {
// GENERIC-NEXT: ^bb0(%{{.*}}: index):
// CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10) {
krnl.iterate(%ok) with (%ik -> %k = 0 to 10) {
krnl.iterate(%ik) with (%ik -> %k = 0 to 10) {
}
}
@ -41,7 +38,7 @@ func @simple_iterate(%N : index) {
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to %{{.*}}, %{{.*}} -> %{{.*}} = 0 to 10) {
krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) {
krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) {
}
@ -52,25 +49,22 @@ func @simple_iterate(%N : index) {
func @affine_map_bound(%N : index) {
%ii, %ij, %ik = krnl.define_loops 3
%oi, %oj, %ok = krnl.optimize_loops {
krnl.return_loops %ii, %ij, %ik
} : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) {
krnl.iterate(%oi, %oj) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) {
krnl.iterate(%ii, %ij) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) {
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
// GENERIC-NEXT: ^bb0(%{{.*}}: index):
// CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = #{{.*}}(%{{.*}}, %{{.*}}) to #{{.*}}(%{{.*}}, %{{.*}})) {
krnl.iterate(%ok) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) {
krnl.iterate(%ik) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) {
}
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
// GENERIC-NEXT: ^bb0(%{{.*}}: index):
// CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = max #map{{.*}}(%{{.*}}, %{{.*}}) to min #map{{.*}}(%{{.*}}, %{{.*}})[%{{.*}}]) {
krnl.iterate(%ok) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) {
krnl.iterate(%ik) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) {
}
}

View File

@ -12,14 +12,12 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
// CHECK: [[MEMPOOL:%.+]] = alloc() : memref<400xi8>
// CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
// CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops
// CHECK: krnl.iterate
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32>
// CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops
// CHECK: krnl.iterate
// CHECK: dealloc [[MEMPOOL]] : memref<400xi8>
// CHECK: return [[RES]] : memref<10x10xf32>
@ -41,14 +39,12 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
// CHECK: [[MEMPOOL1:%.+]] = alloc() : memref<400xi8>
// CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
// CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops
// CHECK: krnl.iterate
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops
// CHECK: krnl.iterate
// CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32>
// CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32>
@ -57,7 +53,6 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
// CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops
// CHECK: krnl.iterate
// CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32>

View File

@ -2,7 +2,7 @@
// CHECK-LABEL: func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32>
func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32> {
%0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32>
%0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 1.0, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32>
return %0 : memref<1x70xf32>
// CHECK: {{.*}} = "krnl.global"() {name = "constant_0", shape = [1, 70]} : () -> memref<1x70xf32>

File diff suppressed because it is too large Load Diff

View File

@ -12,10 +12,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
@ -23,10 +20,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
/// Second Add
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
@ -51,10 +45,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
@ -62,10 +53,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
/// Second Mul
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
@ -90,10 +78,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
@ -101,10 +86,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
/// Second Div
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
@ -129,10 +111,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
@ -140,10 +119,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
/// Second Sub
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
@ -168,10 +144,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1
@ -179,10 +152,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
/// Second And
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1
@ -207,10 +177,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1
@ -218,10 +185,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*
/// Second Or
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1
@ -246,10 +210,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1
@ -257,10 +218,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
/// Second Xor
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1
@ -286,12 +244,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
@ -301,12 +256,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
// CHECK: affine.store [[EXP]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
@ -331,12 +283,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -352,12 +301,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -388,12 +334,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
@ -409,12 +352,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
@ -445,12 +385,9 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
@ -466,12 +403,8 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
@ -502,12 +435,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
@ -522,12 +452,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
@ -557,12 +484,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
@ -574,12 +498,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
@ -605,10 +526,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
@ -616,10 +534,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
/// Second Sum
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
@ -644,10 +559,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
@ -656,10 +568,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
/// Second Max
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
@ -685,10 +594,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
@ -697,10 +603,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
/// Second Min
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
@ -727,12 +630,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
@ -749,12 +649,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
@ -786,12 +683,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
@ -805,12 +699,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
@ -839,12 +730,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
@ -862,12 +750,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
@ -900,12 +785,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
@ -924,12 +806,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
@ -963,12 +842,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_0:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32
@ -979,12 +855,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[C0_2:%.+]] = constant 0 : index
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32