Remove optimize_loops/return_loops op. (#200)
* Remove optimize_loops/return_loops op in elementwise ops lowering and fix tests in onnx_lowering.mlir. * Fix all tests. * Remove all occurences of def_loops/return_loops. * Fix test. * Fix comments for defineLoops & emitKrnlLoopsAndIterationForOperand function. * Remove emitOptimizedLoops. * Allow not specifying optimizedLoops when creating KrnlIterateOperandPack. * Fix style. * Make BuildKernelLoop helper not emit optimize/return_loop operations & retire emitKrnlLoopsAndIterationForOperand by replacing it with BuildKernelLoop. * DefineLoops -> DefineLoopsEx, remove redundant emitKrnlLoopsAndIterationForOperand function. * BuildKrnlLoop API name update. * Tweak comments. * Remove unused withEmptyOptimization flag. * Better comment for BuildKrnlLoop. * Fully remove krnl.return_loops/optimize_loops op. * Trigger Windows Build * Bump windows ci python version.
This commit is contained in:
parent
07757a28ce
commit
01a4977c74
|
@ -13,7 +13,7 @@ jobs:
|
|||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.7.7'
|
||||
versionSpec: '3.7.8'
|
||||
architecture: 'x64'
|
||||
|
||||
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
|
||||
|
|
|
@ -525,26 +525,16 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
|
|||
|
||||
SmallVector<Value, 4> loopIVs;
|
||||
if (!hasAllScalarValues(operands)) {
|
||||
std::vector<Value> originalLoops;
|
||||
KrnlOptimizeLoopsOp optimizedLoopsOp;
|
||||
KrnlIterateOp iterateOp;
|
||||
emitKrnlLoopsAndIterationForOperand(
|
||||
rewriter, loc, X, originalLoops, optimizedLoopsOp, iterateOp);
|
||||
Block &optimizationBlock = optimizedLoopsOp.region().front();
|
||||
Block &iterationBlock = iterateOp.bodyRegion().front();
|
||||
// Create iterateOp & get block within iterate op.
|
||||
BuildKrnlLoop loops(rewriter, loc, memRefType.getRank());
|
||||
loops.createDefineAndIterateOp(X);
|
||||
Block *iterationBlock = loops.getIterateBlock();
|
||||
|
||||
// 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
|
||||
rewriter.setInsertionPointToEnd(&optimizationBlock);
|
||||
// Return from KrnlOptimizeLoopsOp body.
|
||||
// When no optimizations are present we just return the loops
|
||||
// unchaged.
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
// 2. Insert instructions inside the KernelIterateOp body.
|
||||
rewriter.setInsertionPointToStart(&iterationBlock);
|
||||
// Insert instructions inside the KernelIterateOp body.
|
||||
rewriter.setInsertionPointToStart(iterationBlock);
|
||||
|
||||
// Handle the operation:
|
||||
for (auto arg : iterationBlock.getArguments())
|
||||
for (auto arg : iterationBlock->getArguments())
|
||||
loopIVs.push_back(arg);
|
||||
}
|
||||
|
||||
|
@ -555,7 +545,6 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
|
|||
rewriter.create<AffineStoreOp>(loc, loweredOpResult, alloc, loopIVs);
|
||||
|
||||
rewriter.replaceOp(op, alloc);
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
@ -598,25 +587,16 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
|
|||
broadcastedDimInfo =
|
||||
getBroadcastedDimInfo(loc, rewriter, memRefType, operands);
|
||||
|
||||
std::vector<Value> originalLoops;
|
||||
KrnlOptimizeLoopsOp optimizedLoopsOp;
|
||||
KrnlIterateOp iterateOp;
|
||||
emitKrnlLoopsAndIterationForOperand(
|
||||
rewriter, loc, alloc, originalLoops, optimizedLoopsOp, iterateOp);
|
||||
Block &optimizationBlock = optimizedLoopsOp.region().front();
|
||||
Block &iterationBlock = iterateOp.bodyRegion().front();
|
||||
// Create iterateOp & get block within iterate op.
|
||||
BuildKrnlLoop loops(rewriter, loc, memRefType.getRank());
|
||||
loops.createDefineAndIterateOp(alloc);
|
||||
Block *iterationBlock = loops.getIterateBlock();
|
||||
|
||||
// 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
|
||||
rewriter.setInsertionPointToEnd(&optimizationBlock);
|
||||
// Return from KrnlOptimizeLoopsOp body.
|
||||
// When no optimizations are present we just return the loops unchaged.
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
// 2. Insert instructions inside the KernelIterateOp body.
|
||||
rewriter.setInsertionPointToStart(&iterationBlock);
|
||||
// Insert instructions inside the KernelIterateOp body.
|
||||
rewriter.setInsertionPointToStart(iterationBlock);
|
||||
|
||||
// Handle the operation:
|
||||
for (auto arg : iterationBlock.getArguments())
|
||||
for (auto arg : iterationBlock->getArguments())
|
||||
loopIVs.push_back(arg);
|
||||
}
|
||||
// Fold over operands for each of their scalar values.
|
||||
|
|
|
@ -72,9 +72,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
|||
|
||||
// Define loops.
|
||||
std::vector<Value> originalLoops;
|
||||
std::vector<Value> optimizedLoops;
|
||||
Block *optimizationBlock =
|
||||
defineLoops(rewriter, loc, originalLoops, optimizedLoops, numLoops);
|
||||
defineLoops(rewriter, loc, originalLoops, numLoops);
|
||||
|
||||
// We have two Krnl loops:
|
||||
// - Outer loop iterates over the output matrix dimensions, and
|
||||
|
@ -84,23 +82,18 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
|||
std::vector<Value> outerLoops, optimizedOuterLoops;
|
||||
outerLoops.reserve(2);
|
||||
optimizedOuterLoops.reserve(2);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int i = 0; i < 2; ++i)
|
||||
outerLoops.push_back(originalLoops[i]);
|
||||
optimizedOuterLoops.push_back(optimizedLoops[i]);
|
||||
}
|
||||
KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops);
|
||||
KrnlIterateOperandPack outerPack(rewriter, outerLoops);
|
||||
// Induction variables for the outer loops
|
||||
for (int i = 0; i < 2; ++i)
|
||||
addDimensionToPack(rewriter, loc, outerPack, alloc, i);
|
||||
|
||||
// Reduction loop
|
||||
std::vector<Value> reductionLoops, optimizedReductionLoops;
|
||||
std::vector<Value> reductionLoops;
|
||||
reductionLoops.reserve(1);
|
||||
optimizedReductionLoops.reserve(1);
|
||||
reductionLoops.push_back(originalLoops[2]);
|
||||
optimizedReductionLoops.push_back(optimizedLoops[2]);
|
||||
KrnlIterateOperandPack reductionPack(
|
||||
rewriter, reductionLoops, optimizedReductionLoops);
|
||||
KrnlIterateOperandPack reductionPack(rewriter, reductionLoops);
|
||||
// Induction variable for the reduction dimension
|
||||
// Try to find and use a static value from A or B first.
|
||||
// If it failed then use a dynamic value.
|
||||
|
@ -140,10 +133,6 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
|||
// Now perform the insertions into the body of the
|
||||
// just generated instructions:
|
||||
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
// Insert instructions inside the outer loop.
|
||||
Block &outerIterationBlock = outerIterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&outerIterationBlock);
|
||||
|
@ -154,14 +143,15 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
|||
loopMNIVs.emplace_back(arg);
|
||||
}
|
||||
|
||||
// Initialize the output of A*B
|
||||
// Initialize the output of A * B
|
||||
auto zero = emitConstantOp(rewriter, loc, memRefType.getElementType(), 0);
|
||||
rewriter.create<AffineStoreOp>(loc, zero, alloc, loopMNIVs);
|
||||
|
||||
// Compute A*B
|
||||
// Compute A * B
|
||||
auto matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, reductionPack);
|
||||
|
||||
// Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting)
|
||||
// Compute beta * C, and add up to alpha * A * B (unidirectional
|
||||
// broadcasting)
|
||||
auto loadedAB = rewriter.create<AffineLoadOp>(loc, alloc, loopMNIVs);
|
||||
auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB);
|
||||
if (hasBias) {
|
||||
|
@ -175,7 +165,7 @@ struct ONNXGemmOpLowering : public ConversionPattern {
|
|||
rewriter.create<AffineStoreOp>(loc, alphaAB, alloc, loopMNIVs);
|
||||
}
|
||||
|
||||
// Insert instructions to do matrix multiplication: A*B
|
||||
// Insert instructions to do matrix multiplication: A * B
|
||||
Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&matmulIterationBlock);
|
||||
|
||||
|
|
|
@ -117,9 +117,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
|||
|
||||
// Define loops for batch dimensions.
|
||||
std::vector<Value> originalLoops;
|
||||
std::vector<Value> optimizedLoops;
|
||||
Block *optimizationBlock = defineLoops(
|
||||
rewriter, loc, originalLoops, optimizedLoops, memRefShape.size());
|
||||
defineLoops(rewriter, loc, originalLoops, memRefShape.size());
|
||||
|
||||
// Outer KrnlIterateOp
|
||||
SmallVector<Value, 4> loopBatchIVs;
|
||||
|
@ -131,24 +129,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
|||
for (int i = 0; i < memRefShape.size() - matmulResultDims; ++i)
|
||||
batchAxes.emplace_back(i);
|
||||
|
||||
std::vector<Value> outerLoops, optimizedOuterLoops;
|
||||
std::vector<Value> outerLoops;
|
||||
outerLoops.reserve(batchAxes.size());
|
||||
optimizedOuterLoops.reserve(batchAxes.size());
|
||||
for (int i = 0; i < batchAxes.size(); ++i) {
|
||||
for (int i = 0; i < batchAxes.size(); ++i)
|
||||
outerLoops.push_back(originalLoops[i]);
|
||||
optimizedOuterLoops.push_back(optimizedLoops[i]);
|
||||
}
|
||||
KrnlIterateOperandPack outerPack(
|
||||
rewriter, outerLoops, optimizedOuterLoops);
|
||||
|
||||
KrnlIterateOperandPack outerPack(rewriter, outerLoops);
|
||||
for (int i = 0; i < batchAxes.size(); ++i) {
|
||||
addDimensionToPack(rewriter, loc, outerPack, alloc, i);
|
||||
}
|
||||
auto outerIterateOp = rewriter.create<KrnlIterateOp>(loc, outerPack);
|
||||
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
// Insert instructions into the outer KrnlIterateOp.
|
||||
Block &outerIterationBlock = outerIterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&outerIterationBlock);
|
||||
|
@ -165,18 +156,14 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
|||
|
||||
// Create a KrnlIterateOp for matrix multiplication.
|
||||
KrnlIterateOp matmulIterateOp;
|
||||
std::vector<Value> matmulLoops, optimizedMatmulLoops;
|
||||
std::vector<Value> matmulLoops;
|
||||
if (AShape.size() >= 2 && BShape.size() >= 2) {
|
||||
// 2-D x 2-D. Result has two dimensions.
|
||||
matmulLoops.reserve(2);
|
||||
optimizedMatmulLoops.reserve(2);
|
||||
for (int i = 2; i > 0; --i) {
|
||||
matmulLoops.emplace_back(originalLoops[memRefShape.size() - i]);
|
||||
optimizedMatmulLoops.emplace_back(
|
||||
optimizedLoops[memRefShape.size() - i]);
|
||||
}
|
||||
KrnlIterateOperandPack matmulPack(
|
||||
rewriter, matmulLoops, optimizedMatmulLoops);
|
||||
KrnlIterateOperandPack matmulPack(rewriter, matmulLoops);
|
||||
for (int i = 2; i > 0; --i) {
|
||||
addDimensionToPack(
|
||||
rewriter, loc, matmulPack, alloc, memRefShape.size() - i);
|
||||
|
@ -185,23 +172,13 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
|||
} else {
|
||||
// 1-D x 2-D, and vice versa. Result has one dimension.
|
||||
matmulLoops.reserve(1);
|
||||
optimizedMatmulLoops.reserve(1);
|
||||
matmulLoops.emplace_back(originalLoops[memRefShape.size() - 1]);
|
||||
optimizedMatmulLoops.emplace_back(
|
||||
optimizedLoops[memRefShape.size() - 1]);
|
||||
KrnlIterateOperandPack matmulPack(
|
||||
rewriter, matmulLoops, optimizedMatmulLoops);
|
||||
KrnlIterateOperandPack matmulPack(rewriter, matmulLoops);
|
||||
addDimensionToPack(
|
||||
rewriter, loc, matmulPack, alloc, memRefShape.size() - 1);
|
||||
matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, matmulPack);
|
||||
}
|
||||
|
||||
if (!hasBatchLoop) {
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
}
|
||||
|
||||
// Insert instructions into the matmul KrnlIterateOp.
|
||||
Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&matmulIterationBlock);
|
||||
|
@ -226,18 +203,11 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
|||
// Iterate along the reduction dimension.
|
||||
// Use a value from A.
|
||||
std::vector<Value> reduceLoops;
|
||||
std::vector<Value> optimizedReduceLoops;
|
||||
Block *optimizationReduceBlock =
|
||||
defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1);
|
||||
KrnlIterateOperandPack reducePack(
|
||||
rewriter, reduceLoops, optimizedReduceLoops);
|
||||
defineLoops(rewriter, loc, reduceLoops, 1);
|
||||
KrnlIterateOperandPack reducePack(rewriter, reduceLoops);
|
||||
addDimensionToPack(rewriter, loc, reducePack, A, AShape.size() - 1);
|
||||
auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack);
|
||||
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationReduceBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops);
|
||||
|
||||
// Insert instructions into the reduction KrnlIterateOp.
|
||||
Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&reduceIterationBlock);
|
||||
|
@ -288,18 +258,12 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
|
|||
// Iterate along the reduction dimension.
|
||||
// Use a value from A.
|
||||
std::vector<Value> reduceLoops;
|
||||
std::vector<Value> optimizedReduceLoops;
|
||||
Block *optimizationReduceBlock =
|
||||
defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1);
|
||||
KrnlIterateOperandPack reducePack(
|
||||
rewriter, reduceLoops, optimizedReduceLoops);
|
||||
|
||||
defineLoops(rewriter, loc, reduceLoops, 1);
|
||||
KrnlIterateOperandPack reducePack(rewriter, reduceLoops);
|
||||
addDimensionToPack(rewriter, loc, reducePack, A, 0);
|
||||
auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack);
|
||||
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationReduceBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops);
|
||||
|
||||
// Insert instructions into the reduction KrnlIterateOp.
|
||||
Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&reduceIterationBlock);
|
||||
|
|
|
@ -183,13 +183,10 @@ struct ONNXReductionOpLowering : public ConversionPattern {
|
|||
|
||||
// Define loops to initialize the result.
|
||||
std::vector<Value> originalLoopsInit;
|
||||
std::vector<Value> optimizedLoopsInit;
|
||||
Block *optimizationBlockInit = defineLoops(
|
||||
rewriter, loc, originalLoopsInit, optimizedLoopsInit, outRank);
|
||||
defineLoops(rewriter, loc, originalLoopsInit, outRank);
|
||||
|
||||
// Iteration information
|
||||
KrnlIterateOperandPack packInit(
|
||||
rewriter, originalLoopsInit, optimizedLoopsInit);
|
||||
KrnlIterateOperandPack packInit(rewriter, originalLoopsInit);
|
||||
for (decltype(outRank) i = 0; i < outRank; ++i) {
|
||||
addDimensionToPack(rewriter, loc, packInit, alloc, i);
|
||||
}
|
||||
|
@ -197,9 +194,6 @@ struct ONNXReductionOpLowering : public ConversionPattern {
|
|||
Block &iterationBlockInit = iterateOpInit.bodyRegion().front();
|
||||
|
||||
// Perform the insertions into the body of the initialization loop.
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlockInit);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoopsInit);
|
||||
|
||||
// Insert instructions inside the KernelIterateOp body.
|
||||
rewriter.setInsertionPointToStart(&iterationBlockInit);
|
||||
|
@ -216,11 +210,10 @@ struct ONNXReductionOpLowering : public ConversionPattern {
|
|||
|
||||
// Define an Krnl loop to do reduction.
|
||||
rewriter.setInsertionPointAfter(iterateOpInit);
|
||||
std::vector<Value> originalLoops, optimizedLoops;
|
||||
Block *optimizationBlock =
|
||||
defineLoops(rewriter, loc, originalLoops, optimizedLoops, inRank);
|
||||
std::vector<Value> originalLoops;
|
||||
defineLoops(rewriter, loc, originalLoops, inRank);
|
||||
// Iteration information
|
||||
KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
|
||||
KrnlIterateOperandPack pack(rewriter, originalLoops);
|
||||
for (decltype(inRank) i = 0; i < inRank; ++i) {
|
||||
addDimensionToPack(rewriter, loc, pack, operands[0], i);
|
||||
}
|
||||
|
@ -228,10 +221,6 @@ struct ONNXReductionOpLowering : public ConversionPattern {
|
|||
Block &iterationBlock = iterateOp.bodyRegion().front();
|
||||
|
||||
// Perform the insertions into the body of the reduction loop.
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
// Insert instructions inside the KernelIterateOp body.
|
||||
rewriter.setInsertionPointToStart(&iterationBlock);
|
||||
|
||||
|
|
|
@ -54,9 +54,7 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
|
|||
|
||||
// Define loops.
|
||||
std::vector<Value> originalLoops;
|
||||
std::vector<Value> optimizedLoops;
|
||||
Block *optimizationBlock =
|
||||
defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
|
||||
defineLoops(rewriter, loc, originalLoops, rank);
|
||||
|
||||
// Coerce the input into a 2-D tensor. `axis` will be the coercing point.
|
||||
// This coercing follows the softmax definition in ONNX:
|
||||
|
@ -65,26 +63,22 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
|
|||
// dimensions. The outer loop is only created once `axis` is not zero.
|
||||
|
||||
// Define an outer loop with respect to axis.
|
||||
std::vector<Value> outerLoops, optimizedOuterLoops;
|
||||
std::vector<Value> outerLoops;
|
||||
outerLoops.reserve(axis);
|
||||
optimizedOuterLoops.reserve(axis);
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outerLoops.push_back(originalLoops[i]);
|
||||
optimizedOuterLoops.push_back(optimizedLoops[i]);
|
||||
}
|
||||
KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops);
|
||||
KrnlIterateOperandPack outerPack(rewriter, outerLoops);
|
||||
for (int i = 0; i < axis; ++i)
|
||||
addDimensionToPack(rewriter, loc, outerPack, input, i);
|
||||
|
||||
// Define an inner loop with respect to axis.
|
||||
std::vector<Value> innerLoops, optimizedInnerLoops;
|
||||
std::vector<Value> innerLoops;
|
||||
innerLoops.reserve(rank - axis);
|
||||
optimizedInnerLoops.reserve(rank - axis);
|
||||
for (int i = axis; i < rank; ++i) {
|
||||
innerLoops.push_back(originalLoops[i]);
|
||||
optimizedInnerLoops.push_back(optimizedLoops[i]);
|
||||
}
|
||||
KrnlIterateOperandPack innerPack(rewriter, innerLoops, optimizedInnerLoops);
|
||||
KrnlIterateOperandPack innerPack(rewriter, innerLoops);
|
||||
for (int i = axis; i < rank; ++i)
|
||||
addDimensionToPack(rewriter, loc, innerPack, input, i);
|
||||
|
||||
|
@ -93,10 +87,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
|
|||
if (axis != 0) {
|
||||
outerIterateOp = rewriter.create<KrnlIterateOp>(loc, outerPack);
|
||||
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
// Insert instructions inside the outer loop.
|
||||
Block &outerIterationBlock = outerIterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&outerIterationBlock);
|
||||
|
@ -126,10 +116,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
|
|||
sumIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack);
|
||||
// Create an inner loop to compute softmax.
|
||||
softmaxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack);
|
||||
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
}
|
||||
|
||||
// Insert instructions inside the max loop.
|
||||
|
|
|
@ -107,7 +107,7 @@ struct ONNXConvOpLowering : public ConversionPattern {
|
|||
// 1. Define outer loops and emit empty optimization block:
|
||||
int64_t nOuterLoops = (group > 1) ? 3 : 2;
|
||||
BuildKrnlLoop outerLoops(rewriter, loc, nOuterLoops);
|
||||
outerLoops.createDefineAndOptimizeOp();
|
||||
outerLoops.createDefineOp();
|
||||
// for n = 0 .. N:
|
||||
int nIndex = outerLoops.pushBounds(0, inputOperand, 0);
|
||||
// for g = 0 .. N:
|
||||
|
@ -142,7 +142,7 @@ struct ONNXConvOpLowering : public ConversionPattern {
|
|||
// 2.2 Define spatial loops
|
||||
int64_t nSpatialLoops = resultShape.size() - 2;
|
||||
BuildKrnlLoop spatialLoops(rewriter, loc, nSpatialLoops);
|
||||
spatialLoops.createDefineAndOptimizeOp();
|
||||
spatialLoops.createDefineOp();
|
||||
for (int i = 2; i < resultShape.size(); ++i)
|
||||
spatialLoops.pushBounds(0, alloc, i);
|
||||
|
||||
|
@ -168,7 +168,7 @@ struct ONNXConvOpLowering : public ConversionPattern {
|
|||
// 3.2 Define inner loops.
|
||||
int64_t nInnerLoops = 1 + (kernelShape.size() - 2);
|
||||
BuildKrnlLoop innerLoops(rewriter, loc, nInnerLoops);
|
||||
innerLoops.createDefineAndOptimizeOp();
|
||||
innerLoops.createDefineOp();
|
||||
// for c = 0 .. C/group
|
||||
int cIndex = innerLoops.pushBounds(0, kernelShape[1]);
|
||||
// for Kx = 0 .. KX
|
||||
|
|
|
@ -57,9 +57,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
|
|||
int64_t rank = memRefType.getRank();
|
||||
|
||||
std::vector<Value> originalLoops;
|
||||
std::vector<Value> optimizedLoops;
|
||||
Block *optimizationBlock =
|
||||
defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
|
||||
defineLoops(rewriter, loc, originalLoops, rank);
|
||||
|
||||
// Create a KrnlIterateOp along C dimension.
|
||||
// This will be the outer-most loop in order to re-use scale, bias,
|
||||
|
@ -67,8 +65,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
|
|||
|
||||
SmallVector<Value, 1> loopCIVs;
|
||||
if (rank > 1) {
|
||||
KrnlIterateOperandPack cPack(
|
||||
rewriter, originalLoops[1], optimizedLoops[1]);
|
||||
KrnlIterateOperandPack cPack(rewriter, originalLoops[1]);
|
||||
addDimensionToPack(rewriter, loc, cPack, operand, 1);
|
||||
auto cIterateOp = rewriter.create<KrnlIterateOp>(loc, cPack);
|
||||
Block &cIterationBlock = cIterateOp.bodyRegion().front();
|
||||
|
@ -89,21 +86,16 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
|
|||
axes.emplace_back(0);
|
||||
for (int64_t i = 2; i < rank; ++i)
|
||||
axes.emplace_back(i);
|
||||
std::vector<Value> packLoops, packOptimizedLoops;
|
||||
std::vector<Value> packLoops;
|
||||
for (int i = 0; i < axes.size(); ++i) {
|
||||
packLoops.emplace_back(originalLoops[axes[i]]);
|
||||
packOptimizedLoops.emplace_back(optimizedLoops[axes[i]]);
|
||||
}
|
||||
KrnlIterateOperandPack pack(rewriter, packLoops, packOptimizedLoops);
|
||||
KrnlIterateOperandPack pack(rewriter, packLoops);
|
||||
for (int i = 0; i < axes.size(); ++i) {
|
||||
addDimensionToPack(rewriter, loc, pack, operand, axes[i]);
|
||||
}
|
||||
auto iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
|
||||
|
||||
// No optimization
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
Block &iterationBlock = iterateOp.bodyRegion().front();
|
||||
rewriter.setInsertionPointToStart(&iterationBlock);
|
||||
|
||||
|
|
|
@ -332,7 +332,7 @@ struct ONNXPoolOpLowering : public ConversionPattern {
|
|||
// for ho in range(HO):
|
||||
// for wo in range(WO):
|
||||
BuildKrnlLoop outputLoops(rewriter, loc, outputShape.size());
|
||||
outputLoops.createDefineOptimizeAndIterateOp(alloc);
|
||||
outputLoops.createDefineAndIterateOp(alloc);
|
||||
|
||||
auto ipMainRegion = rewriter.saveInsertionPoint();
|
||||
rewriter.setInsertionPointToStart(outputLoops.getIterateBlock());
|
||||
|
@ -475,7 +475,7 @@ struct ONNXPoolOpLowering : public ConversionPattern {
|
|||
// output[n][c][ho][wo] =
|
||||
// emitScalarOpFor(output[n][c][ho][wo], input[n, c, hi, wi]);
|
||||
BuildKrnlLoop poolingLoops(rewriter, loc, kernelShape.size());
|
||||
poolingLoops.createDefineAndOptimizeOp();
|
||||
poolingLoops.createDefineOp();
|
||||
for (int i = 0; i < kernelShape.size(); ++i)
|
||||
poolingLoops.pushBounds(
|
||||
0, poolDimMap, llvm::makeArrayRef(IVsAndConstants[i]));
|
||||
|
|
|
@ -190,59 +190,13 @@ void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc,
|
|||
}
|
||||
}
|
||||
|
||||
// Function that defines the KRNL dialect loops and their respective
|
||||
// optimized version.
|
||||
KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter,
|
||||
Location loc, std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
|
||||
int64_t numLoops) {
|
||||
// Define loops.
|
||||
// Function that emits the definition of loops references.
|
||||
void defineLoops(ConversionPatternRewriter &rewriter, Location loc,
|
||||
std::vector<Value> &loops, int64_t numLoops) {
|
||||
auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, numLoops);
|
||||
loops.reserve(numLoops);
|
||||
for (auto result : loopsOp.getResults())
|
||||
loops.push_back(result);
|
||||
|
||||
// Define optimized version of the loops.
|
||||
auto optimizedLoopsOp = rewriter.create<KrnlOptimizeLoopsOp>(loc, numLoops);
|
||||
optimizedLoops.reserve(numLoops);
|
||||
for (auto result : optimizedLoopsOp.getResults())
|
||||
optimizedLoops.push_back(result);
|
||||
|
||||
return optimizedLoopsOp;
|
||||
}
|
||||
|
||||
// Function that emits the loops and their optimized version.
|
||||
// The function returns a reference to the inner optimization block.
|
||||
Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc,
|
||||
std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
|
||||
int64_t numLoops) {
|
||||
KrnlOptimizeLoopsOp optimizedLoopsOp =
|
||||
emitOptimizedLoops(rewriter, loc, loops, optimizedLoops, numLoops);
|
||||
return &optimizedLoopsOp.region().front();
|
||||
}
|
||||
|
||||
// Function which emits a basic set of loops and optimized loops
|
||||
// for a given operation argument. A reference to the loop optimization
|
||||
// block is returned in the last argument of the function.
|
||||
void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter,
|
||||
Location loc, Value operand, std::vector<Value> &originalLoops,
|
||||
KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp) {
|
||||
// Operand shape.
|
||||
auto shape = operand.getType().cast<MemRefType>().getShape();
|
||||
|
||||
// Number of loops.
|
||||
int64_t rank = shape.size();
|
||||
|
||||
// Define loops and optimized loops.
|
||||
std::vector<Value> optimizedLoops;
|
||||
optimizedLoopsOp =
|
||||
emitOptimizedLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
|
||||
|
||||
KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
|
||||
// Iterate over the loop nest.
|
||||
for (int i = 0; i < rank; ++i)
|
||||
addDimensionToPack(rewriter, loc, pack, operand, i);
|
||||
|
||||
iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
|
||||
}
|
||||
|
||||
unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
|
||||
|
|
|
@ -63,24 +63,10 @@ std::map<int64_t, int64_t> getReductionMapping(
|
|||
void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc,
|
||||
KrnlIterateOperandPack &pack, Value operand, int index);
|
||||
|
||||
// Function that defines the KRNL dialect loops and their respective
|
||||
// optimized version.
|
||||
KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter,
|
||||
Location loc, std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
|
||||
int64_t numLoops);
|
||||
|
||||
// Function that emits the loops and their optimized version.
|
||||
// The function returns a reference to the inner optimization block.
|
||||
Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc,
|
||||
std::vector<Value> &loops, std::vector<Value> &optimizedLoops,
|
||||
int64_t numLoops);
|
||||
|
||||
// Function which emits a basic set of loops and optimized loops
|
||||
// for a given operation argument. A reference to the loop optimization
|
||||
// block is returned in the last argument of the function.
|
||||
void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter,
|
||||
Location loc, Value operand, std::vector<Value> &originalLoops,
|
||||
KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp);
|
||||
// Function that emits the define_loop operation to define `numLoops`
|
||||
// number of krnl loops, and fill `loop` with the newly defined loops.
|
||||
void defineLoops(ConversionPatternRewriter &rewriter, Location loc,
|
||||
std::vector<Value> &loops, int64_t numLoops);
|
||||
|
||||
unsigned getMemRefEltSizeInBytes(MemRefType memRefType);
|
||||
|
||||
|
|
|
@ -212,7 +212,7 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
|
|||
operandAdaptor.X().getType().cast<ShapedType>().getElementType(), 0);
|
||||
int nLoops = 3;
|
||||
BuildKrnlLoop initializationLoops(rewriter, loc, nLoops);
|
||||
initializationLoops.createDefineOptimizeAndIterateOp(state.ht);
|
||||
initializationLoops.createDefineAndIterateOp(state.ht);
|
||||
auto ipInitializationLoops = rewriter.saveInsertionPoint();
|
||||
rewriter.setInsertionPointToStart(initializationLoops.getIterateBlock());
|
||||
{
|
||||
|
@ -292,7 +292,7 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
|
|||
// compute it, ft, ct, Ct, ot, Ht
|
||||
|
||||
BuildKrnlLoop stateLoops(rewriter, loc, 2);
|
||||
stateLoops.createDefineAndOptimizeOp();
|
||||
stateLoops.createDefineOp();
|
||||
stateLoops.pushBounds(0, batchDimSize);
|
||||
stateLoops.pushBounds(0, hiddenDimSize);
|
||||
stateLoops.createIterateOp();
|
||||
|
@ -372,7 +372,7 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
|
|||
{ // Emit instructions for matrix multiplications.
|
||||
// input_size is the reduction dimension.
|
||||
BuildKrnlLoop reductionLoops(rewriter, loc, 1);
|
||||
reductionLoops.createDefineAndOptimizeOp();
|
||||
reductionLoops.createDefineOp();
|
||||
reductionLoops.pushBounds(0, inputDimSize);
|
||||
reductionLoops.createIterateOp();
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ struct ONNXRNNOpLowering : public ConversionPattern {
|
|||
|
||||
if (direction == FORWARD || direction == BIDIRECTIONAL) {
|
||||
BuildKrnlLoop sequenceLoops(rewriter, loc, 1);
|
||||
sequenceLoops.createDefineAndOptimizeOp();
|
||||
sequenceLoops.createDefineOp();
|
||||
sequenceLoops.pushBounds(0, sequenceDimSize);
|
||||
sequenceLoops.createIterateOp();
|
||||
|
||||
|
@ -112,7 +112,7 @@ struct ONNXRNNOpLowering : public ConversionPattern {
|
|||
|
||||
if (direction == REVERSE || direction == BIDIRECTIONAL) {
|
||||
BuildKrnlLoop sequenceLoops(rewriter, loc, 1);
|
||||
sequenceLoops.createDefineAndOptimizeOp();
|
||||
sequenceLoops.createDefineOp();
|
||||
sequenceLoops.pushBounds(0, sequenceDimSize);
|
||||
sequenceLoops.createIterateOp();
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ struct ONNXConcatOpLowering : public ConversionPattern {
|
|||
auto currShape = operands[i].getType().cast<MemRefType>().getShape();
|
||||
// Create loop.
|
||||
BuildKrnlLoop inputLoops(rewriter, loc, rank);
|
||||
inputLoops.createDefineAndOptimizeOp();
|
||||
inputLoops.createDefineOp();
|
||||
for (int r = 0; r < rank; ++r)
|
||||
inputLoops.pushBounds(0, operands[i], r);
|
||||
inputLoops.createIterateOp();
|
||||
|
|
|
@ -63,14 +63,14 @@ struct ONNXPadOpLowering : public ConversionPattern {
|
|||
|
||||
// Iterate over the loop nest using the output shape.
|
||||
BuildKrnlLoop padLoops(rewriter, loc, rank);
|
||||
padLoops.createDefineAndOptimizeOp();
|
||||
padLoops.createDefineOp();
|
||||
for (int i = 0; i < rank; ++i)
|
||||
padLoops.pushBounds(0, alloc, i);
|
||||
padLoops.createIterateOp();
|
||||
|
||||
// Iterate over the loop nest using the input shape.
|
||||
BuildKrnlLoop valueLoops(rewriter, loc, rank);
|
||||
valueLoops.createDefineAndOptimizeOp();
|
||||
valueLoops.createDefineOp();
|
||||
for (int i = 0; i < rank; ++i)
|
||||
valueLoops.pushBounds(0, operandAdaptor.data(), i);
|
||||
valueLoops.createIterateOp();
|
||||
|
|
|
@ -46,14 +46,14 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern {
|
|||
|
||||
// Iterate over the loop nest using the output shape.
|
||||
BuildKrnlLoop padLoops(rewriter, loc, rank);
|
||||
padLoops.createDefineAndOptimizeOp();
|
||||
padLoops.createDefineOp();
|
||||
for (int i = 0; i < rank; ++i)
|
||||
padLoops.pushBounds(0, alloc, i);
|
||||
padLoops.createIterateOp();
|
||||
|
||||
// Iterate over the loop nest using the input shape.
|
||||
BuildKrnlLoop valueLoops(rewriter, loc, rank);
|
||||
valueLoops.createDefineAndOptimizeOp();
|
||||
valueLoops.createDefineOp();
|
||||
for (int i = 0; i < rank; ++i)
|
||||
valueLoops.pushBounds(0, operandAdaptor.data(), i);
|
||||
valueLoops.createIterateOp();
|
||||
|
|
|
@ -70,7 +70,7 @@ struct ONNXSplitOpLowering : public ConversionPattern {
|
|||
OpBuilder::InsertionGuard insertGuard(rewriter);
|
||||
// Create loop.
|
||||
BuildKrnlLoop outputLoops(rewriter, loc, rank);
|
||||
outputLoops.createDefineOptimizeAndIterateOp(allocs[i]);
|
||||
outputLoops.createDefineAndIterateOp(allocs[i]);
|
||||
outputLoops.createIterateOp();
|
||||
rewriter.setInsertionPointToStart(outputLoops.getIterateBlock());
|
||||
// Indices for the read and write.
|
||||
|
|
|
@ -38,11 +38,9 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
|
|||
|
||||
// Define loops.
|
||||
std::vector<Value> originalLoops;
|
||||
std::vector<Value> optimizedLoops;
|
||||
Block *optimizationBlock =
|
||||
defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank);
|
||||
defineLoops(rewriter, loc, originalLoops, rank);
|
||||
|
||||
KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops);
|
||||
KrnlIterateOperandPack pack(rewriter, originalLoops);
|
||||
// Iterate over the loop nest using the input shape.
|
||||
for (int i = 0; i < rank; ++i)
|
||||
addDimensionToPack(rewriter, loc, pack, data, i);
|
||||
|
@ -53,14 +51,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
|
|||
// Now perform the insertions into the body of the
|
||||
// just generated instructions:
|
||||
|
||||
// 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
|
||||
rewriter.setInsertionPointToEnd(optimizationBlock);
|
||||
// Return from KrnlOptimizeLoopsOp body.
|
||||
// When no optimizations are present we just return the loops
|
||||
// unchaged.
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
|
||||
// 2. Insert instructions inside the KernelIterateOp body.
|
||||
// Insert instructions inside the KernelIterateOp body.
|
||||
rewriter.setInsertionPointToStart(&iterationBlock);
|
||||
|
||||
// Handle the operation.
|
||||
|
|
|
@ -161,8 +161,7 @@ void KrnlIterateOperandPack::pushAffineMapBound(
|
|||
BuildKrnlLoop::BuildKrnlLoop(
|
||||
ConversionPatternRewriter &rewriter, Location loc, int loopNum)
|
||||
: rewriter(rewriter), loc(loc), originalLoopNum(loopNum), pack(NULL),
|
||||
pushCount(0), createdDefineOp(false), createdOptimizeOp(false),
|
||||
createdIterateOp(false) {
|
||||
pushCount(0), createdDefineOp(false), createdIterateOp(false) {
|
||||
if (originalLoopNum <= 0)
|
||||
emitError(loc, "Expected positive number of original loops.");
|
||||
}
|
||||
|
@ -177,7 +176,7 @@ BuildKrnlLoop::~BuildKrnlLoop() {
|
|||
free(pack);
|
||||
}
|
||||
|
||||
void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
|
||||
void BuildKrnlLoop::createDefineOp() {
|
||||
// Insert define loop operation.
|
||||
auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, originalLoopNum);
|
||||
originalLoops.reserve(originalLoopNum);
|
||||
|
@ -185,25 +184,8 @@ void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) {
|
|||
originalLoops.push_back(result);
|
||||
createdDefineOp = true;
|
||||
|
||||
// Insert optimize loop operation.
|
||||
auto optimizedLoopsOp =
|
||||
rewriter.create<KrnlOptimizeLoopsOp>(loc, originalLoopNum);
|
||||
optLoops.reserve(originalLoopNum);
|
||||
|
||||
// Emit empty optimizations if flag is set.
|
||||
if (withEmptyOptimization) {
|
||||
for (auto result : optimizedLoopsOp.getResults())
|
||||
optLoops.push_back(result);
|
||||
optBlock = &optimizedLoopsOp.region().front();
|
||||
auto ip = rewriter.saveInsertionPoint();
|
||||
rewriter.setInsertionPointToEnd(optBlock);
|
||||
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
|
||||
rewriter.restoreInsertionPoint(ip);
|
||||
}
|
||||
createdOptimizeOp = true;
|
||||
|
||||
// prepare data structure to push bounds
|
||||
pack = new KrnlIterateOperandPack(rewriter, originalLoops, optLoops);
|
||||
pack = new KrnlIterateOperandPack(rewriter, originalLoops);
|
||||
}
|
||||
|
||||
int BuildKrnlLoop::pushBounds(int64_t lowerBound, int64_t upperBound) {
|
||||
|
@ -254,9 +236,6 @@ void BuildKrnlLoop::createIterateOp() {
|
|||
// Loop definition operation is mandatory.
|
||||
assert(createdDefineOp && "Must create define op before iterate op.");
|
||||
|
||||
// Loop optimization operation is mandatory (for now).
|
||||
assert(createdOptimizeOp && "Must create optimize op before iterate op.");
|
||||
|
||||
// Check if all bounds have been defined.
|
||||
assert(pushCount == originalLoopNum &&
|
||||
"Must push bounds for all original loops.");
|
||||
|
@ -267,15 +246,14 @@ void BuildKrnlLoop::createIterateOp() {
|
|||
createdIterateOp = true;
|
||||
}
|
||||
|
||||
void BuildKrnlLoop::createDefineOptimizeAndIterateOp(
|
||||
Value memRefOperand, bool withEmptyOptimization) {
|
||||
void BuildKrnlLoop::createDefineAndIterateOp(Value memRefOperand) {
|
||||
// Rank of the MemRef operand. We will emit a loop for each dimension.
|
||||
int loopNum = memRefOperand.getType().cast<MemRefType>().getShape().size();
|
||||
assert(originalLoopNum == loopNum &&
|
||||
"Mismatch in loop numbers from constructor and define.");
|
||||
|
||||
// Emit the definition and the optimization operations for the loop nest.
|
||||
createDefineAndOptimizeOp(withEmptyOptimization);
|
||||
createDefineOp();
|
||||
|
||||
// Push a lower-upper bound pair for each dimension of the MemRef operand.
|
||||
// The lower bound in this case is always zero.
|
||||
|
|
|
@ -83,6 +83,13 @@ struct KrnlIterateOperandPack {
|
|||
_operands.end(), optimizedLoops.begin(), optimizedLoops.end());
|
||||
}
|
||||
|
||||
// Create a pack with optimizedLoops = inputLoops (ie., no optimization).
|
||||
KrnlIterateOperandPack(
|
||||
mlir::Builder &builder, llvm::ArrayRef<mlir::Value> inputLoops)
|
||||
: builder(builder), inputLoops(inputLoops), optimizedLoops(inputLoops) {
|
||||
_operands.insert(_operands.end(), inputLoops.begin(), inputLoops.end());
|
||||
}
|
||||
|
||||
void pushConstantBound(int64_t bound);
|
||||
|
||||
void pushOperandBound(mlir::Value operand);
|
||||
|
@ -112,19 +119,15 @@ private:
|
|||
};
|
||||
|
||||
// Helper function to write kernel loops. This class will let us build a single
|
||||
// define/optimize/iterate operation combo. We can then insert optimizations in
|
||||
// the body of the optimization operation, and operations in the body of the
|
||||
// iterate operation.
|
||||
// define/iterate operation combo. We can then insert operations in the body of
|
||||
// the iterate operation.
|
||||
//
|
||||
// The sequence is as follow:
|
||||
//
|
||||
// 1) Create an object giving the rewriter, location, and number of loop in
|
||||
// the original (non optimized) loop.
|
||||
//
|
||||
// 2) Create define & optimize ops (currently paired). Optimizations can then
|
||||
// be added to the inner block of the optimize operation. Make sure to set
|
||||
// the insertion point to that block for optimizations to go in the right
|
||||
// place.
|
||||
// 2) Create define_loops ops to define new loop variables.
|
||||
//
|
||||
// 3) Push the bounds for each of the original loops. Bounds are pushed in
|
||||
// pairs (lower & upper bounds). There are a few methods to do it depending
|
||||
|
@ -153,7 +156,7 @@ public:
|
|||
// Create define and optimize loop with loopNum original loops. If
|
||||
// withEmptyOptimization is true, the optimization is simply the identity
|
||||
// function (no optimizations).
|
||||
void createDefineAndOptimizeOp(bool withEmptyOptimization = true);
|
||||
void createDefineOp();
|
||||
|
||||
// Push bounds (lower and upper) for each of the loops (order matters).
|
||||
// The function returns the order number associated with the loop iteration.
|
||||
|
@ -172,13 +175,12 @@ public:
|
|||
// operations associated with this loop nest have been emitted already.
|
||||
void createIterateOp();
|
||||
|
||||
// Create the loop nest definition, optimization and iteration operations
|
||||
// Create the loop nest definition and iteration operations
|
||||
// for a given operand of MemRef type. The loop nest has a depth equal to the
|
||||
// rank of the MemRef operand. The lower bound of each loop is zero. The
|
||||
// upper bound of each loop is given by the corresponding dimension of the
|
||||
// MemRef operand.
|
||||
void createDefineOptimizeAndIterateOp(
|
||||
Value memRefOperand, bool withEmptyOptimization = true);
|
||||
void createDefineAndIterateOp(Value memRefOperand);
|
||||
|
||||
// Get the (original loop) induction variable associated with the given
|
||||
// index. Use the index returned when pushing the bounds.
|
||||
|
@ -220,7 +222,6 @@ private:
|
|||
|
||||
// Flags that keep track of emitted operations.
|
||||
bool createdDefineOp;
|
||||
bool createdOptimizeOp;
|
||||
bool createdIterateOp;
|
||||
|
||||
// Saved insertion point in the code region of the KrnlOptimizeLoopsOp.
|
||||
|
|
|
@ -78,47 +78,6 @@ ParseResult parseKrnlDefineLoopsOp(
|
|||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// KrnlOptimizeLoopsOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void KrnlOptimizeLoopsOp::build(
|
||||
OpBuilder &builder, OperationState &result, int num_optimized_loops) {
|
||||
result.types.append(num_optimized_loops, LoopType::get(builder.getContext()));
|
||||
// Create a region and a block for the body.
|
||||
// Schedule intrinsics will be placed into this region.
|
||||
Region *region = result.addRegion();
|
||||
auto *body = new Block();
|
||||
region->push_back(body);
|
||||
}
|
||||
|
||||
void print(OpAsmPrinter &p, KrnlOptimizeLoopsOp &op) {
|
||||
p << "krnl.optimize_loops ";
|
||||
p.printRegion(op.region(), /*printEntryBlockArgs=*/false,
|
||||
/*printBlockTerminators=*/true);
|
||||
p << " : ";
|
||||
p.printFunctionalType(op);
|
||||
}
|
||||
|
||||
ParseResult parseKrnlOptimizeLoopsOp(
|
||||
OpAsmParser &parser, OperationState &result) {
|
||||
// Parse the schedule body region.
|
||||
Region *region = result.addRegion();
|
||||
if (parser.parseRegion(*region, llvm::None, llvm::None))
|
||||
return failure();
|
||||
|
||||
// Parse the function type for the schedule operation.
|
||||
// Then following the hint of this parsed function type, parse the
|
||||
// returned timestamp space dimension handlers.
|
||||
FunctionType schedule_func_type;
|
||||
if (parser.parseColonType(schedule_func_type) ||
|
||||
parser.addTypesToList(schedule_func_type.getResults(), result.types)) {
|
||||
failure();
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// KrnlIterateOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -340,26 +299,9 @@ static LogicalResult verify(KrnlIterateOp op) {
|
|||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// KrnlReturnLoopsOp
|
||||
// KrnlEntryPointOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void print(OpAsmPrinter &p, KrnlReturnLoopsOp &op) {
|
||||
p << "krnl.return_loops ";
|
||||
p.printOperands(op.operand_begin(), op.operand_end());
|
||||
}
|
||||
|
||||
ParseResult parseKrnlReturnLoopsOp(
|
||||
OpAsmParser &parser, OperationState &result) {
|
||||
// Parse the loops to return.
|
||||
SmallVector<OpAsmParser::OperandType, 4> timestamp_dim_handlers;
|
||||
if (parser.parseOperandList(timestamp_dim_handlers) ||
|
||||
parser.resolveOperands(timestamp_dim_handlers,
|
||||
LoopType::get(result.getContext()), result.operands))
|
||||
return failure();
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
void KrnlEntryPointOp::build(mlir::OpBuilder &builder, OperationState &state,
|
||||
SymbolRefAttr funcAttr, IntegerAttr numInputs, IntegerAttr numOutputs) {
|
||||
state.addAttribute(KrnlEntryPointOp::getEntryPointFuncAttrName(), funcAttr);
|
||||
|
|
|
@ -48,32 +48,6 @@ def KrnlDefineLoopsOp : Op<Krnl_Dialect, "define_loops"> {
|
|||
}];
|
||||
}
|
||||
|
||||
def KrnlOptimizeLoopsOp : Op<Krnl_Dialect, "optimize_loops"> {
|
||||
let summary = "optimize_loops operation";
|
||||
let description = [{
|
||||
The "krnl.optimize_loops" operation is essentially a cosmetic operation
|
||||
which exists to encapsulate a region where loops are being scheduled /
|
||||
optimized.
|
||||
|
||||
The optimized loops are returned at the end of the region associated with
|
||||
the krnl.optimize_loops operation.
|
||||
|
||||
For example : TBD once we have actual schedule intrinsics.
|
||||
}];
|
||||
|
||||
let arguments = (ins Variadic<AnyType>);
|
||||
let results = (outs Variadic<AnyType>);
|
||||
let regions = (region SizedRegion<1>:$region);
|
||||
|
||||
let skipDefaultBuilders = 1;
|
||||
|
||||
let builders = [ OpBuilder<"OpBuilder &builder, OperationState &result, "
|
||||
"int timestamp_space_rank"> ];
|
||||
|
||||
let printer = [{ return ::print(p, *this); }];
|
||||
let parser = [{ return ::parse$cppClass(parser, result); }];
|
||||
}
|
||||
|
||||
def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator, AffineScope]> {
|
||||
let summary = "iterate operation";
|
||||
let description = [{
|
||||
|
@ -129,19 +103,6 @@ def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator, AffineS
|
|||
let verifier = [{ return ::verify(*this); }];
|
||||
}
|
||||
|
||||
def KrnlReturnLoopsOp : Op<Krnl_Dialect, "return_loops", [Terminator]> {
|
||||
let summary = "Krnl return handler operation";
|
||||
let description = [{
|
||||
Krnl return_loops operation is a terminator operation for returning
|
||||
scheduled dimension handlers in the krnl.optimize_loops region.
|
||||
}];
|
||||
|
||||
let arguments = (ins Variadic<AnyType>);
|
||||
|
||||
let printer = [{ return ::print(p, *this); }];
|
||||
let parser = [{ return ::parse$cppClass(parser, result); }];
|
||||
}
|
||||
|
||||
def KrnlTerminatorOp : Op<Krnl_Dialect, "terminate", [Terminator]> {
|
||||
let summary = "Krnl terminator operation";
|
||||
let description = [{
|
||||
|
|
|
@ -125,21 +125,6 @@ public:
|
|||
// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class KrnlOptimizeLoopsLowering : public OpRewritePattern<KrnlOptimizeLoopsOp> {
|
||||
public:
|
||||
using OpRewritePattern<KrnlOptimizeLoopsOp>::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(
|
||||
KrnlOptimizeLoopsOp op, PatternRewriter &rewriter) const override {
|
||||
rewriter.eraseOp(op);
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class KrnlBlockOpLowering : public OpRewritePattern<KrnlBlockOp> {
|
||||
public:
|
||||
using OpRewritePattern<KrnlBlockOp>::OpRewritePattern;
|
||||
|
@ -151,21 +136,6 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class KrnlReturnLoopOpLowering : public OpRewritePattern<KrnlReturnLoopsOp> {
|
||||
public:
|
||||
using OpRewritePattern<KrnlReturnLoopsOp>::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(
|
||||
KrnlReturnLoopsOp op, PatternRewriter &rewriter) const override {
|
||||
rewriter.eraseOp(op);
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// KrnlToAffineLoweringPass
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -230,14 +200,11 @@ void KrnlToAffineLoweringPass::runOnFunction() {
|
|||
|
||||
OwningRewritePatternList patterns;
|
||||
patterns.insert<KrnlTerminatorLowering, KrnlDefineLoopsLowering,
|
||||
KrnlOptimizeLoopsLowering, KrnlBlockOpLowering, KrnlReturnLoopOpLowering>(
|
||||
&getContext());
|
||||
KrnlBlockOpLowering>(&getContext());
|
||||
|
||||
// Do not lower operations that pertain to schedules just yet.
|
||||
target.addLegalOp<KrnlBlockOp>();
|
||||
target.addLegalOp<KrnlDefineLoopsOp>();
|
||||
target.addLegalOp<KrnlOptimizeLoopsOp>();
|
||||
target.addLegalOp<KrnlReturnLoopsOp>();
|
||||
if (failed(applyPartialConversion(function, target, patterns)))
|
||||
return signalPassFailure();
|
||||
|
||||
|
@ -312,8 +279,6 @@ void KrnlToAffineLoweringPass::runOnFunction() {
|
|||
// Remove/lower schedule related operations.
|
||||
target.addIllegalOp<KrnlDefineLoopsOp>();
|
||||
target.addIllegalOp<KrnlBlockOp>();
|
||||
target.addIllegalOp<KrnlOptimizeLoopsOp>();
|
||||
target.addIllegalOp<KrnlReturnLoopsOp>();
|
||||
if (failed(applyPartialConversion(function, target, patterns)))
|
||||
return signalPassFailure();
|
||||
}
|
||||
|
|
|
@ -12,9 +12,6 @@
|
|||
|
||||
func @simple_iterate(%N : index) {
|
||||
%ii, %ij, %ik = krnl.define_loops 3
|
||||
%oi, %oj, %ok = krnl.optimize_loops {
|
||||
krnl.return_loops %ii, %ij, %ik
|
||||
} : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
|
||||
|
||||
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
|
||||
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
|
||||
|
@ -22,18 +19,18 @@ func @simple_iterate(%N : index) {
|
|||
// GENERIC-NEXT: bounds = [#{{.*}}, #{{.*}}, #{{.*}}, #{{.*}}]
|
||||
|
||||
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 1 to 11) {
|
||||
krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) {
|
||||
krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) {
|
||||
|
||||
}
|
||||
|
||||
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
|
||||
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
|
||||
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) {
|
||||
krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) {
|
||||
krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) {
|
||||
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}) ( {
|
||||
// GENERIC-NEXT: ^bb0(%{{.*}}: index):
|
||||
// CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10) {
|
||||
krnl.iterate(%ok) with (%ik -> %k = 0 to 10) {
|
||||
krnl.iterate(%ik) with (%ik -> %k = 0 to 10) {
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -41,7 +38,7 @@ func @simple_iterate(%N : index) {
|
|||
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
|
||||
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
|
||||
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to %{{.*}}, %{{.*}} -> %{{.*}} = 0 to 10) {
|
||||
krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) {
|
||||
krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) {
|
||||
|
||||
}
|
||||
|
||||
|
@ -52,25 +49,22 @@ func @simple_iterate(%N : index) {
|
|||
|
||||
func @affine_map_bound(%N : index) {
|
||||
%ii, %ij, %ik = krnl.define_loops 3
|
||||
%oi, %oj, %ok = krnl.optimize_loops {
|
||||
krnl.return_loops %ii, %ij, %ik
|
||||
} : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
|
||||
|
||||
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
|
||||
// GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index):
|
||||
// CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) {
|
||||
krnl.iterate(%oi, %oj) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) {
|
||||
krnl.iterate(%ii, %ij) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) {
|
||||
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
|
||||
// GENERIC-NEXT: ^bb0(%{{.*}}: index):
|
||||
// CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = #{{.*}}(%{{.*}}, %{{.*}}) to #{{.*}}(%{{.*}}, %{{.*}})) {
|
||||
krnl.iterate(%ok) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) {
|
||||
krnl.iterate(%ik) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) {
|
||||
|
||||
}
|
||||
|
||||
// GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( {
|
||||
// GENERIC-NEXT: ^bb0(%{{.*}}: index):
|
||||
// CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = max #map{{.*}}(%{{.*}}, %{{.*}}) to min #map{{.*}}(%{{.*}}, %{{.*}})[%{{.*}}]) {
|
||||
krnl.iterate(%ok) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) {
|
||||
krnl.iterate(%ik) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) {
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,14 +12,12 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
|
|||
// CHECK: [[MEMPOOL:%.+]] = alloc() : memref<400xi8>
|
||||
// CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
||||
// CHECK: krnl.define_loops
|
||||
// CHECK: krnl.optimize_loops
|
||||
// CHECK: krnl.iterate
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
|
||||
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||
// CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32>
|
||||
// CHECK: krnl.define_loops
|
||||
// CHECK: krnl.optimize_loops
|
||||
// CHECK: krnl.iterate
|
||||
// CHECK: dealloc [[MEMPOOL]] : memref<400xi8>
|
||||
// CHECK: return [[RES]] : memref<10x10xf32>
|
||||
|
@ -41,14 +39,12 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
|
|||
// CHECK: [[MEMPOOL1:%.+]] = alloc() : memref<400xi8>
|
||||
// CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32>
|
||||
// CHECK: krnl.define_loops
|
||||
// CHECK: krnl.optimize_loops
|
||||
// CHECK: krnl.iterate
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||
// CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: krnl.define_loops
|
||||
// CHECK: krnl.optimize_loops
|
||||
// CHECK: krnl.iterate
|
||||
// CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32>
|
||||
|
@ -57,7 +53,6 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
|
|||
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
|
||||
// CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
||||
// CHECK: krnl.define_loops
|
||||
// CHECK: krnl.optimize_loops
|
||||
// CHECK: krnl.iterate
|
||||
// CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
|
||||
// CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32>
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
// CHECK-LABEL: func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32>
|
||||
func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32> {
|
||||
%0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32>
|
||||
%0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 1.0, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32>
|
||||
return %0 : memref<1x70xf32>
|
||||
|
||||
// CHECK: {{.*}} = "krnl.global"() {name = "constant_0", shape = [1, 70]} : () -> memref<1x70xf32>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -12,10 +12,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -23,10 +20,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
|
||||
/// Second Add
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -51,10 +45,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -62,10 +53,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
|
||||
/// Second Mul
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -90,10 +78,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -101,10 +86,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
|
||||
/// Second Div
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -129,10 +111,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -140,10 +119,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
|
||||
/// Second Sub
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -168,10 +144,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1
|
||||
|
@ -179,10 +152,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
|
|||
|
||||
/// Second And
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1
|
||||
|
@ -207,10 +177,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1
|
||||
|
@ -218,10 +185,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*
|
|||
|
||||
/// Second Or
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1
|
||||
|
@ -246,10 +210,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1
|
||||
|
@ -257,10 +218,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
|
|||
|
||||
/// Second Xor
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
|
||||
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1
|
||||
|
@ -286,12 +244,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
|
||||
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
|
@ -301,12 +256,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
|
||||
// CHECK: affine.store [[EXP]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
|
@ -331,12 +283,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
|
||||
|
@ -352,12 +301,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
|
||||
|
@ -388,12 +334,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
|
||||
|
@ -409,12 +352,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
|
||||
|
@ -445,12 +385,9 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
|
||||
|
@ -466,12 +403,8 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
|
||||
|
@ -502,12 +435,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -522,12 +452,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -557,12 +484,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
|
||||
|
@ -574,12 +498,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
|
||||
|
@ -605,10 +526,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -616,10 +534,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
|
||||
/// Second Sum
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -644,10 +559,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -656,10 +568,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
|
||||
/// Second Max
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -685,10 +594,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
// CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -697,10 +603,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
|
|||
|
||||
/// Second Min
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
|
||||
// CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
|
||||
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
|
||||
|
@ -727,12 +630,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -749,12 +649,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -786,12 +683,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -805,12 +699,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -839,12 +730,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -862,12 +750,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -900,12 +785,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -924,12 +806,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
|
@ -963,12 +842,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32>
|
||||
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_0:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
// CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32
|
||||
|
@ -979,12 +855,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
|
|||
// CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32>
|
||||
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
|
||||
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
|
||||
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
|
||||
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
|
||||
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
|
||||
// CHECK: [[C0_2:%.+]] = constant 0 : index
|
||||
// CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32>
|
||||
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
|
||||
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
|
||||
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
|
||||
// CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32
|
||||
|
|
Loading…
Reference in New Issue