diff --git a/.azure-pipelines/Windows-CI.yml b/.azure-pipelines/Windows-CI.yml index b5c3509..a272c3a 100644 --- a/.azure-pipelines/Windows-CI.yml +++ b/.azure-pipelines/Windows-CI.yml @@ -13,7 +13,7 @@ jobs: steps: - task: UsePythonVersion@0 inputs: - versionSpec: '3.7.7' + versionSpec: '3.7.8' architecture: 'x64' - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp index b01a197..3bc2222 100644 --- a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp @@ -525,26 +525,16 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern { SmallVector loopIVs; if (!hasAllScalarValues(operands)) { - std::vector originalLoops; - KrnlOptimizeLoopsOp optimizedLoopsOp; - KrnlIterateOp iterateOp; - emitKrnlLoopsAndIterationForOperand( - rewriter, loc, X, originalLoops, optimizedLoopsOp, iterateOp); - Block &optimizationBlock = optimizedLoopsOp.region().front(); - Block &iterationBlock = iterateOp.bodyRegion().front(); + // Create iterateOp & get block within iterate op. + BuildKrnlLoop loops(rewriter, loc, memRefType.getRank()); + loops.createDefineAndIterateOp(X); + Block *iterationBlock = loops.getIterateBlock(); - // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body. - rewriter.setInsertionPointToEnd(&optimizationBlock); - // Return from KrnlOptimizeLoopsOp body. - // When no optimizations are present we just return the loops - // unchaged. - rewriter.create(loc, originalLoops); - - // 2. Insert instructions inside the KernelIterateOp body. - rewriter.setInsertionPointToStart(&iterationBlock); + // Insert instructions inside the KernelIterateOp body. + rewriter.setInsertionPointToStart(iterationBlock); // Handle the operation: - for (auto arg : iterationBlock.getArguments()) + for (auto arg : iterationBlock->getArguments()) loopIVs.push_back(arg); } @@ -555,7 +545,6 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern { rewriter.create(loc, loweredOpResult, alloc, loopIVs); rewriter.replaceOp(op, alloc); - return success(); } }; @@ -598,25 +587,16 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern { broadcastedDimInfo = getBroadcastedDimInfo(loc, rewriter, memRefType, operands); - std::vector originalLoops; - KrnlOptimizeLoopsOp optimizedLoopsOp; - KrnlIterateOp iterateOp; - emitKrnlLoopsAndIterationForOperand( - rewriter, loc, alloc, originalLoops, optimizedLoopsOp, iterateOp); - Block &optimizationBlock = optimizedLoopsOp.region().front(); - Block &iterationBlock = iterateOp.bodyRegion().front(); + // Create iterateOp & get block within iterate op. + BuildKrnlLoop loops(rewriter, loc, memRefType.getRank()); + loops.createDefineAndIterateOp(alloc); + Block *iterationBlock = loops.getIterateBlock(); - // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body. - rewriter.setInsertionPointToEnd(&optimizationBlock); - // Return from KrnlOptimizeLoopsOp body. - // When no optimizations are present we just return the loops unchaged. - rewriter.create(loc, originalLoops); - - // 2. Insert instructions inside the KernelIterateOp body. - rewriter.setInsertionPointToStart(&iterationBlock); + // Insert instructions inside the KernelIterateOp body. + rewriter.setInsertionPointToStart(iterationBlock); // Handle the operation: - for (auto arg : iterationBlock.getArguments()) + for (auto arg : iterationBlock->getArguments()) loopIVs.push_back(arg); } // Fold over operands for each of their scalar values. diff --git a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp index 0ba8396..cce0529 100644 --- a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp @@ -72,9 +72,7 @@ struct ONNXGemmOpLowering : public ConversionPattern { // Define loops. std::vector originalLoops; - std::vector optimizedLoops; - Block *optimizationBlock = - defineLoops(rewriter, loc, originalLoops, optimizedLoops, numLoops); + defineLoops(rewriter, loc, originalLoops, numLoops); // We have two Krnl loops: // - Outer loop iterates over the output matrix dimensions, and @@ -84,23 +82,18 @@ struct ONNXGemmOpLowering : public ConversionPattern { std::vector outerLoops, optimizedOuterLoops; outerLoops.reserve(2); optimizedOuterLoops.reserve(2); - for (int i = 0; i < 2; ++i) { + for (int i = 0; i < 2; ++i) outerLoops.push_back(originalLoops[i]); - optimizedOuterLoops.push_back(optimizedLoops[i]); - } - KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops); + KrnlIterateOperandPack outerPack(rewriter, outerLoops); // Induction variables for the outer loops for (int i = 0; i < 2; ++i) addDimensionToPack(rewriter, loc, outerPack, alloc, i); // Reduction loop - std::vector reductionLoops, optimizedReductionLoops; + std::vector reductionLoops; reductionLoops.reserve(1); - optimizedReductionLoops.reserve(1); reductionLoops.push_back(originalLoops[2]); - optimizedReductionLoops.push_back(optimizedLoops[2]); - KrnlIterateOperandPack reductionPack( - rewriter, reductionLoops, optimizedReductionLoops); + KrnlIterateOperandPack reductionPack(rewriter, reductionLoops); // Induction variable for the reduction dimension // Try to find and use a static value from A or B first. // If it failed then use a dynamic value. @@ -140,10 +133,6 @@ struct ONNXGemmOpLowering : public ConversionPattern { // Now perform the insertions into the body of the // just generated instructions: - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlock); - rewriter.create(loc, originalLoops); - // Insert instructions inside the outer loop. Block &outerIterationBlock = outerIterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&outerIterationBlock); @@ -154,14 +143,15 @@ struct ONNXGemmOpLowering : public ConversionPattern { loopMNIVs.emplace_back(arg); } - // Initialize the output of A*B + // Initialize the output of A * B auto zero = emitConstantOp(rewriter, loc, memRefType.getElementType(), 0); rewriter.create(loc, zero, alloc, loopMNIVs); - // Compute A*B + // Compute A * B auto matmulIterateOp = rewriter.create(loc, reductionPack); - // Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting) + // Compute beta * C, and add up to alpha * A * B (unidirectional + // broadcasting) auto loadedAB = rewriter.create(loc, alloc, loopMNIVs); auto alphaAB = rewriter.create(loc, alpha, loadedAB); if (hasBias) { @@ -175,7 +165,7 @@ struct ONNXGemmOpLowering : public ConversionPattern { rewriter.create(loc, alphaAB, alloc, loopMNIVs); } - // Insert instructions to do matrix multiplication: A*B + // Insert instructions to do matrix multiplication: A * B Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&matmulIterationBlock); diff --git a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp index def7f4f..69f0006 100644 --- a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp +++ b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp @@ -117,9 +117,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern { // Define loops for batch dimensions. std::vector originalLoops; - std::vector optimizedLoops; - Block *optimizationBlock = defineLoops( - rewriter, loc, originalLoops, optimizedLoops, memRefShape.size()); + defineLoops(rewriter, loc, originalLoops, memRefShape.size()); // Outer KrnlIterateOp SmallVector loopBatchIVs; @@ -131,24 +129,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern { for (int i = 0; i < memRefShape.size() - matmulResultDims; ++i) batchAxes.emplace_back(i); - std::vector outerLoops, optimizedOuterLoops; + std::vector outerLoops; outerLoops.reserve(batchAxes.size()); - optimizedOuterLoops.reserve(batchAxes.size()); - for (int i = 0; i < batchAxes.size(); ++i) { + for (int i = 0; i < batchAxes.size(); ++i) outerLoops.push_back(originalLoops[i]); - optimizedOuterLoops.push_back(optimizedLoops[i]); - } - KrnlIterateOperandPack outerPack( - rewriter, outerLoops, optimizedOuterLoops); + + KrnlIterateOperandPack outerPack(rewriter, outerLoops); for (int i = 0; i < batchAxes.size(); ++i) { addDimensionToPack(rewriter, loc, outerPack, alloc, i); } auto outerIterateOp = rewriter.create(loc, outerPack); - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlock); - rewriter.create(loc, originalLoops); - // Insert instructions into the outer KrnlIterateOp. Block &outerIterationBlock = outerIterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&outerIterationBlock); @@ -165,18 +156,14 @@ struct ONNXMatMulOpLowering : public ConversionPattern { // Create a KrnlIterateOp for matrix multiplication. KrnlIterateOp matmulIterateOp; - std::vector matmulLoops, optimizedMatmulLoops; + std::vector matmulLoops; if (AShape.size() >= 2 && BShape.size() >= 2) { // 2-D x 2-D. Result has two dimensions. matmulLoops.reserve(2); - optimizedMatmulLoops.reserve(2); for (int i = 2; i > 0; --i) { matmulLoops.emplace_back(originalLoops[memRefShape.size() - i]); - optimizedMatmulLoops.emplace_back( - optimizedLoops[memRefShape.size() - i]); } - KrnlIterateOperandPack matmulPack( - rewriter, matmulLoops, optimizedMatmulLoops); + KrnlIterateOperandPack matmulPack(rewriter, matmulLoops); for (int i = 2; i > 0; --i) { addDimensionToPack( rewriter, loc, matmulPack, alloc, memRefShape.size() - i); @@ -185,23 +172,13 @@ struct ONNXMatMulOpLowering : public ConversionPattern { } else { // 1-D x 2-D, and vice versa. Result has one dimension. matmulLoops.reserve(1); - optimizedMatmulLoops.reserve(1); matmulLoops.emplace_back(originalLoops[memRefShape.size() - 1]); - optimizedMatmulLoops.emplace_back( - optimizedLoops[memRefShape.size() - 1]); - KrnlIterateOperandPack matmulPack( - rewriter, matmulLoops, optimizedMatmulLoops); + KrnlIterateOperandPack matmulPack(rewriter, matmulLoops); addDimensionToPack( rewriter, loc, matmulPack, alloc, memRefShape.size() - 1); matmulIterateOp = rewriter.create(loc, matmulPack); } - if (!hasBatchLoop) { - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlock); - rewriter.create(loc, originalLoops); - } - // Insert instructions into the matmul KrnlIterateOp. Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&matmulIterationBlock); @@ -226,18 +203,11 @@ struct ONNXMatMulOpLowering : public ConversionPattern { // Iterate along the reduction dimension. // Use a value from A. std::vector reduceLoops; - std::vector optimizedReduceLoops; - Block *optimizationReduceBlock = - defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1); - KrnlIterateOperandPack reducePack( - rewriter, reduceLoops, optimizedReduceLoops); + defineLoops(rewriter, loc, reduceLoops, 1); + KrnlIterateOperandPack reducePack(rewriter, reduceLoops); addDimensionToPack(rewriter, loc, reducePack, A, AShape.size() - 1); auto reduceIterateOp = rewriter.create(loc, reducePack); - // No optimization - rewriter.setInsertionPointToEnd(optimizationReduceBlock); - rewriter.create(loc, reduceLoops); - // Insert instructions into the reduction KrnlIterateOp. Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&reduceIterationBlock); @@ -288,18 +258,12 @@ struct ONNXMatMulOpLowering : public ConversionPattern { // Iterate along the reduction dimension. // Use a value from A. std::vector reduceLoops; - std::vector optimizedReduceLoops; - Block *optimizationReduceBlock = - defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1); - KrnlIterateOperandPack reducePack( - rewriter, reduceLoops, optimizedReduceLoops); + + defineLoops(rewriter, loc, reduceLoops, 1); + KrnlIterateOperandPack reducePack(rewriter, reduceLoops); addDimensionToPack(rewriter, loc, reducePack, A, 0); auto reduceIterateOp = rewriter.create(loc, reducePack); - // No optimization - rewriter.setInsertionPointToEnd(optimizationReduceBlock); - rewriter.create(loc, reduceLoops); - // Insert instructions into the reduction KrnlIterateOp. Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&reduceIterationBlock); diff --git a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp index d5bdfa9..2a66c39 100644 --- a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp @@ -183,13 +183,10 @@ struct ONNXReductionOpLowering : public ConversionPattern { // Define loops to initialize the result. std::vector originalLoopsInit; - std::vector optimizedLoopsInit; - Block *optimizationBlockInit = defineLoops( - rewriter, loc, originalLoopsInit, optimizedLoopsInit, outRank); + defineLoops(rewriter, loc, originalLoopsInit, outRank); // Iteration information - KrnlIterateOperandPack packInit( - rewriter, originalLoopsInit, optimizedLoopsInit); + KrnlIterateOperandPack packInit(rewriter, originalLoopsInit); for (decltype(outRank) i = 0; i < outRank; ++i) { addDimensionToPack(rewriter, loc, packInit, alloc, i); } @@ -197,9 +194,6 @@ struct ONNXReductionOpLowering : public ConversionPattern { Block &iterationBlockInit = iterateOpInit.bodyRegion().front(); // Perform the insertions into the body of the initialization loop. - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlockInit); - rewriter.create(loc, originalLoopsInit); // Insert instructions inside the KernelIterateOp body. rewriter.setInsertionPointToStart(&iterationBlockInit); @@ -216,11 +210,10 @@ struct ONNXReductionOpLowering : public ConversionPattern { // Define an Krnl loop to do reduction. rewriter.setInsertionPointAfter(iterateOpInit); - std::vector originalLoops, optimizedLoops; - Block *optimizationBlock = - defineLoops(rewriter, loc, originalLoops, optimizedLoops, inRank); + std::vector originalLoops; + defineLoops(rewriter, loc, originalLoops, inRank); // Iteration information - KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops); + KrnlIterateOperandPack pack(rewriter, originalLoops); for (decltype(inRank) i = 0; i < inRank; ++i) { addDimensionToPack(rewriter, loc, pack, operands[0], i); } @@ -228,10 +221,6 @@ struct ONNXReductionOpLowering : public ConversionPattern { Block &iterationBlock = iterateOp.bodyRegion().front(); // Perform the insertions into the body of the reduction loop. - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlock); - rewriter.create(loc, originalLoops); - // Insert instructions inside the KernelIterateOp body. rewriter.setInsertionPointToStart(&iterationBlock); diff --git a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp index 1f9ec3f..44826e2 100644 --- a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp @@ -54,9 +54,7 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { // Define loops. std::vector originalLoops; - std::vector optimizedLoops; - Block *optimizationBlock = - defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank); + defineLoops(rewriter, loc, originalLoops, rank); // Coerce the input into a 2-D tensor. `axis` will be the coercing point. // This coercing follows the softmax definition in ONNX: @@ -65,26 +63,22 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { // dimensions. The outer loop is only created once `axis` is not zero. // Define an outer loop with respect to axis. - std::vector outerLoops, optimizedOuterLoops; + std::vector outerLoops; outerLoops.reserve(axis); - optimizedOuterLoops.reserve(axis); for (int i = 0; i < axis; ++i) { outerLoops.push_back(originalLoops[i]); - optimizedOuterLoops.push_back(optimizedLoops[i]); } - KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops); + KrnlIterateOperandPack outerPack(rewriter, outerLoops); for (int i = 0; i < axis; ++i) addDimensionToPack(rewriter, loc, outerPack, input, i); // Define an inner loop with respect to axis. - std::vector innerLoops, optimizedInnerLoops; + std::vector innerLoops; innerLoops.reserve(rank - axis); - optimizedInnerLoops.reserve(rank - axis); for (int i = axis; i < rank; ++i) { innerLoops.push_back(originalLoops[i]); - optimizedInnerLoops.push_back(optimizedLoops[i]); } - KrnlIterateOperandPack innerPack(rewriter, innerLoops, optimizedInnerLoops); + KrnlIterateOperandPack innerPack(rewriter, innerLoops); for (int i = axis; i < rank; ++i) addDimensionToPack(rewriter, loc, innerPack, input, i); @@ -93,10 +87,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { if (axis != 0) { outerIterateOp = rewriter.create(loc, outerPack); - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlock); - rewriter.create(loc, originalLoops); - // Insert instructions inside the outer loop. Block &outerIterationBlock = outerIterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&outerIterationBlock); @@ -126,10 +116,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { sumIterateOp = rewriter.create(loc, innerPack); // Create an inner loop to compute softmax. softmaxIterateOp = rewriter.create(loc, innerPack); - - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlock); - rewriter.create(loc, originalLoops); } // Insert instructions inside the max loop. diff --git a/src/Conversion/ONNXToKrnl/NN/Conv.cpp b/src/Conversion/ONNXToKrnl/NN/Conv.cpp index c11b93c..607f43b 100644 --- a/src/Conversion/ONNXToKrnl/NN/Conv.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Conv.cpp @@ -107,7 +107,7 @@ struct ONNXConvOpLowering : public ConversionPattern { // 1. Define outer loops and emit empty optimization block: int64_t nOuterLoops = (group > 1) ? 3 : 2; BuildKrnlLoop outerLoops(rewriter, loc, nOuterLoops); - outerLoops.createDefineAndOptimizeOp(); + outerLoops.createDefineOp(); // for n = 0 .. N: int nIndex = outerLoops.pushBounds(0, inputOperand, 0); // for g = 0 .. N: @@ -142,7 +142,7 @@ struct ONNXConvOpLowering : public ConversionPattern { // 2.2 Define spatial loops int64_t nSpatialLoops = resultShape.size() - 2; BuildKrnlLoop spatialLoops(rewriter, loc, nSpatialLoops); - spatialLoops.createDefineAndOptimizeOp(); + spatialLoops.createDefineOp(); for (int i = 2; i < resultShape.size(); ++i) spatialLoops.pushBounds(0, alloc, i); @@ -168,7 +168,7 @@ struct ONNXConvOpLowering : public ConversionPattern { // 3.2 Define inner loops. int64_t nInnerLoops = 1 + (kernelShape.size() - 2); BuildKrnlLoop innerLoops(rewriter, loc, nInnerLoops); - innerLoops.createDefineAndOptimizeOp(); + innerLoops.createDefineOp(); // for c = 0 .. C/group int cIndex = innerLoops.pushBounds(0, kernelShape[1]); // for Kx = 0 .. KX diff --git a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp index 144bfc9..e160bea 100644 --- a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp @@ -57,9 +57,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { int64_t rank = memRefType.getRank(); std::vector originalLoops; - std::vector optimizedLoops; - Block *optimizationBlock = - defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank); + defineLoops(rewriter, loc, originalLoops, rank); // Create a KrnlIterateOp along C dimension. // This will be the outer-most loop in order to re-use scale, bias, @@ -67,8 +65,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { SmallVector loopCIVs; if (rank > 1) { - KrnlIterateOperandPack cPack( - rewriter, originalLoops[1], optimizedLoops[1]); + KrnlIterateOperandPack cPack(rewriter, originalLoops[1]); addDimensionToPack(rewriter, loc, cPack, operand, 1); auto cIterateOp = rewriter.create(loc, cPack); Block &cIterationBlock = cIterateOp.bodyRegion().front(); @@ -89,21 +86,16 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { axes.emplace_back(0); for (int64_t i = 2; i < rank; ++i) axes.emplace_back(i); - std::vector packLoops, packOptimizedLoops; + std::vector packLoops; for (int i = 0; i < axes.size(); ++i) { packLoops.emplace_back(originalLoops[axes[i]]); - packOptimizedLoops.emplace_back(optimizedLoops[axes[i]]); } - KrnlIterateOperandPack pack(rewriter, packLoops, packOptimizedLoops); + KrnlIterateOperandPack pack(rewriter, packLoops); for (int i = 0; i < axes.size(); ++i) { addDimensionToPack(rewriter, loc, pack, operand, axes[i]); } auto iterateOp = rewriter.create(loc, pack); - // No optimization - rewriter.setInsertionPointToEnd(optimizationBlock); - rewriter.create(loc, originalLoops); - Block &iterationBlock = iterateOp.bodyRegion().front(); rewriter.setInsertionPointToStart(&iterationBlock); diff --git a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp index 14738a0..d40de6f 100644 --- a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp @@ -332,7 +332,7 @@ struct ONNXPoolOpLowering : public ConversionPattern { // for ho in range(HO): // for wo in range(WO): BuildKrnlLoop outputLoops(rewriter, loc, outputShape.size()); - outputLoops.createDefineOptimizeAndIterateOp(alloc); + outputLoops.createDefineAndIterateOp(alloc); auto ipMainRegion = rewriter.saveInsertionPoint(); rewriter.setInsertionPointToStart(outputLoops.getIterateBlock()); @@ -475,7 +475,7 @@ struct ONNXPoolOpLowering : public ConversionPattern { // output[n][c][ho][wo] = // emitScalarOpFor(output[n][c][ho][wo], input[n, c, hi, wi]); BuildKrnlLoop poolingLoops(rewriter, loc, kernelShape.size()); - poolingLoops.createDefineAndOptimizeOp(); + poolingLoops.createDefineOp(); for (int i = 0; i < kernelShape.size(); ++i) poolingLoops.pushBounds( 0, poolDimMap, llvm::makeArrayRef(IVsAndConstants[i])); diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp index e278f66..8882622 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp @@ -190,59 +190,13 @@ void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc, } } -// Function that defines the KRNL dialect loops and their respective -// optimized version. -KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter, - Location loc, std::vector &loops, std::vector &optimizedLoops, - int64_t numLoops) { - // Define loops. +// Function that emits the definition of loops references. +void defineLoops(ConversionPatternRewriter &rewriter, Location loc, + std::vector &loops, int64_t numLoops) { auto loopsOp = rewriter.create(loc, numLoops); loops.reserve(numLoops); for (auto result : loopsOp.getResults()) loops.push_back(result); - - // Define optimized version of the loops. - auto optimizedLoopsOp = rewriter.create(loc, numLoops); - optimizedLoops.reserve(numLoops); - for (auto result : optimizedLoopsOp.getResults()) - optimizedLoops.push_back(result); - - return optimizedLoopsOp; -} - -// Function that emits the loops and their optimized version. -// The function returns a reference to the inner optimization block. -Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc, - std::vector &loops, std::vector &optimizedLoops, - int64_t numLoops) { - KrnlOptimizeLoopsOp optimizedLoopsOp = - emitOptimizedLoops(rewriter, loc, loops, optimizedLoops, numLoops); - return &optimizedLoopsOp.region().front(); -} - -// Function which emits a basic set of loops and optimized loops -// for a given operation argument. A reference to the loop optimization -// block is returned in the last argument of the function. -void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter, - Location loc, Value operand, std::vector &originalLoops, - KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp) { - // Operand shape. - auto shape = operand.getType().cast().getShape(); - - // Number of loops. - int64_t rank = shape.size(); - - // Define loops and optimized loops. - std::vector optimizedLoops; - optimizedLoopsOp = - emitOptimizedLoops(rewriter, loc, originalLoops, optimizedLoops, rank); - - KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops); - // Iterate over the loop nest. - for (int i = 0; i < rank; ++i) - addDimensionToPack(rewriter, loc, pack, operand, i); - - iterateOp = rewriter.create(loc, pack); } unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp index 5d6597b..f1c6b14 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp @@ -63,24 +63,10 @@ std::map getReductionMapping( void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc, KrnlIterateOperandPack &pack, Value operand, int index); -// Function that defines the KRNL dialect loops and their respective -// optimized version. -KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter, - Location loc, std::vector &loops, std::vector &optimizedLoops, - int64_t numLoops); - -// Function that emits the loops and their optimized version. -// The function returns a reference to the inner optimization block. -Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc, - std::vector &loops, std::vector &optimizedLoops, - int64_t numLoops); - -// Function which emits a basic set of loops and optimized loops -// for a given operation argument. A reference to the loop optimization -// block is returned in the last argument of the function. -void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter, - Location loc, Value operand, std::vector &originalLoops, - KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp); +// Function that emits the define_loop operation to define `numLoops` +// number of krnl loops, and fill `loop` with the newly defined loops. +void defineLoops(ConversionPatternRewriter &rewriter, Location loc, + std::vector &loops, int64_t numLoops); unsigned getMemRefEltSizeInBytes(MemRefType memRefType); diff --git a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp index 47752b0..808de78 100644 --- a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp +++ b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp @@ -212,7 +212,7 @@ LstmState allocAndInitializeStates( operandAdaptor.X().getType().cast().getElementType(), 0); int nLoops = 3; BuildKrnlLoop initializationLoops(rewriter, loc, nLoops); - initializationLoops.createDefineOptimizeAndIterateOp(state.ht); + initializationLoops.createDefineAndIterateOp(state.ht); auto ipInitializationLoops = rewriter.saveInsertionPoint(); rewriter.setInsertionPointToStart(initializationLoops.getIterateBlock()); { @@ -292,7 +292,7 @@ void calculateState( // compute it, ft, ct, Ct, ot, Ht BuildKrnlLoop stateLoops(rewriter, loc, 2); - stateLoops.createDefineAndOptimizeOp(); + stateLoops.createDefineOp(); stateLoops.pushBounds(0, batchDimSize); stateLoops.pushBounds(0, hiddenDimSize); stateLoops.createIterateOp(); @@ -372,7 +372,7 @@ void calculateState( { // Emit instructions for matrix multiplications. // input_size is the reduction dimension. BuildKrnlLoop reductionLoops(rewriter, loc, 1); - reductionLoops.createDefineAndOptimizeOp(); + reductionLoops.createDefineOp(); reductionLoops.pushBounds(0, inputDimSize); reductionLoops.createIterateOp(); diff --git a/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp b/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp index eeebcd9..efcc396 100644 --- a/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp +++ b/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp @@ -93,7 +93,7 @@ struct ONNXRNNOpLowering : public ConversionPattern { if (direction == FORWARD || direction == BIDIRECTIONAL) { BuildKrnlLoop sequenceLoops(rewriter, loc, 1); - sequenceLoops.createDefineAndOptimizeOp(); + sequenceLoops.createDefineOp(); sequenceLoops.pushBounds(0, sequenceDimSize); sequenceLoops.createIterateOp(); @@ -112,7 +112,7 @@ struct ONNXRNNOpLowering : public ConversionPattern { if (direction == REVERSE || direction == BIDIRECTIONAL) { BuildKrnlLoop sequenceLoops(rewriter, loc, 1); - sequenceLoops.createDefineAndOptimizeOp(); + sequenceLoops.createDefineOp(); sequenceLoops.pushBounds(0, sequenceDimSize); sequenceLoops.createIterateOp(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp index e13eb47..c335d82 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp @@ -46,7 +46,7 @@ struct ONNXConcatOpLowering : public ConversionPattern { auto currShape = operands[i].getType().cast().getShape(); // Create loop. BuildKrnlLoop inputLoops(rewriter, loc, rank); - inputLoops.createDefineAndOptimizeOp(); + inputLoops.createDefineOp(); for (int r = 0; r < rank; ++r) inputLoops.pushBounds(0, operands[i], r); inputLoops.createIterateOp(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp index 2ac65d2..2f34b87 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp @@ -63,14 +63,14 @@ struct ONNXPadOpLowering : public ConversionPattern { // Iterate over the loop nest using the output shape. BuildKrnlLoop padLoops(rewriter, loc, rank); - padLoops.createDefineAndOptimizeOp(); + padLoops.createDefineOp(); for (int i = 0; i < rank; ++i) padLoops.pushBounds(0, alloc, i); padLoops.createIterateOp(); // Iterate over the loop nest using the input shape. BuildKrnlLoop valueLoops(rewriter, loc, rank); - valueLoops.createDefineAndOptimizeOp(); + valueLoops.createDefineOp(); for (int i = 0; i < rank; ++i) valueLoops.pushBounds(0, operandAdaptor.data(), i); valueLoops.createIterateOp(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp b/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp index c932cae..ebc9195 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp @@ -46,14 +46,14 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern { // Iterate over the loop nest using the output shape. BuildKrnlLoop padLoops(rewriter, loc, rank); - padLoops.createDefineAndOptimizeOp(); + padLoops.createDefineOp(); for (int i = 0; i < rank; ++i) padLoops.pushBounds(0, alloc, i); padLoops.createIterateOp(); // Iterate over the loop nest using the input shape. BuildKrnlLoop valueLoops(rewriter, loc, rank); - valueLoops.createDefineAndOptimizeOp(); + valueLoops.createDefineOp(); for (int i = 0; i < rank; ++i) valueLoops.pushBounds(0, operandAdaptor.data(), i); valueLoops.createIterateOp(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp index eaf84af..68e1ba3 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp @@ -70,7 +70,7 @@ struct ONNXSplitOpLowering : public ConversionPattern { OpBuilder::InsertionGuard insertGuard(rewriter); // Create loop. BuildKrnlLoop outputLoops(rewriter, loc, rank); - outputLoops.createDefineOptimizeAndIterateOp(allocs[i]); + outputLoops.createDefineAndIterateOp(allocs[i]); outputLoops.createIterateOp(); rewriter.setInsertionPointToStart(outputLoops.getIterateBlock()); // Indices for the read and write. diff --git a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp index 7ecf946..d912f9e 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp @@ -38,11 +38,9 @@ struct ONNXTransposeOpLowering : public ConversionPattern { // Define loops. std::vector originalLoops; - std::vector optimizedLoops; - Block *optimizationBlock = - defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank); + defineLoops(rewriter, loc, originalLoops, rank); - KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops); + KrnlIterateOperandPack pack(rewriter, originalLoops); // Iterate over the loop nest using the input shape. for (int i = 0; i < rank; ++i) addDimensionToPack(rewriter, loc, pack, data, i); @@ -53,14 +51,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern { // Now perform the insertions into the body of the // just generated instructions: - // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body. - rewriter.setInsertionPointToEnd(optimizationBlock); - // Return from KrnlOptimizeLoopsOp body. - // When no optimizations are present we just return the loops - // unchaged. - rewriter.create(loc, originalLoops); - - // 2. Insert instructions inside the KernelIterateOp body. + // Insert instructions inside the KernelIterateOp body. rewriter.setInsertionPointToStart(&iterationBlock); // Handle the operation. diff --git a/src/Dialect/Krnl/KrnlHelper.cpp b/src/Dialect/Krnl/KrnlHelper.cpp index 4a36618..b683346 100644 --- a/src/Dialect/Krnl/KrnlHelper.cpp +++ b/src/Dialect/Krnl/KrnlHelper.cpp @@ -161,8 +161,7 @@ void KrnlIterateOperandPack::pushAffineMapBound( BuildKrnlLoop::BuildKrnlLoop( ConversionPatternRewriter &rewriter, Location loc, int loopNum) : rewriter(rewriter), loc(loc), originalLoopNum(loopNum), pack(NULL), - pushCount(0), createdDefineOp(false), createdOptimizeOp(false), - createdIterateOp(false) { + pushCount(0), createdDefineOp(false), createdIterateOp(false) { if (originalLoopNum <= 0) emitError(loc, "Expected positive number of original loops."); } @@ -177,7 +176,7 @@ BuildKrnlLoop::~BuildKrnlLoop() { free(pack); } -void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) { +void BuildKrnlLoop::createDefineOp() { // Insert define loop operation. auto loopsOp = rewriter.create(loc, originalLoopNum); originalLoops.reserve(originalLoopNum); @@ -185,25 +184,8 @@ void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) { originalLoops.push_back(result); createdDefineOp = true; - // Insert optimize loop operation. - auto optimizedLoopsOp = - rewriter.create(loc, originalLoopNum); - optLoops.reserve(originalLoopNum); - - // Emit empty optimizations if flag is set. - if (withEmptyOptimization) { - for (auto result : optimizedLoopsOp.getResults()) - optLoops.push_back(result); - optBlock = &optimizedLoopsOp.region().front(); - auto ip = rewriter.saveInsertionPoint(); - rewriter.setInsertionPointToEnd(optBlock); - rewriter.create(loc, originalLoops); - rewriter.restoreInsertionPoint(ip); - } - createdOptimizeOp = true; - // prepare data structure to push bounds - pack = new KrnlIterateOperandPack(rewriter, originalLoops, optLoops); + pack = new KrnlIterateOperandPack(rewriter, originalLoops); } int BuildKrnlLoop::pushBounds(int64_t lowerBound, int64_t upperBound) { @@ -254,9 +236,6 @@ void BuildKrnlLoop::createIterateOp() { // Loop definition operation is mandatory. assert(createdDefineOp && "Must create define op before iterate op."); - // Loop optimization operation is mandatory (for now). - assert(createdOptimizeOp && "Must create optimize op before iterate op."); - // Check if all bounds have been defined. assert(pushCount == originalLoopNum && "Must push bounds for all original loops."); @@ -267,15 +246,14 @@ void BuildKrnlLoop::createIterateOp() { createdIterateOp = true; } -void BuildKrnlLoop::createDefineOptimizeAndIterateOp( - Value memRefOperand, bool withEmptyOptimization) { +void BuildKrnlLoop::createDefineAndIterateOp(Value memRefOperand) { // Rank of the MemRef operand. We will emit a loop for each dimension. int loopNum = memRefOperand.getType().cast().getShape().size(); assert(originalLoopNum == loopNum && "Mismatch in loop numbers from constructor and define."); // Emit the definition and the optimization operations for the loop nest. - createDefineAndOptimizeOp(withEmptyOptimization); + createDefineOp(); // Push a lower-upper bound pair for each dimension of the MemRef operand. // The lower bound in this case is always zero. diff --git a/src/Dialect/Krnl/KrnlHelper.hpp b/src/Dialect/Krnl/KrnlHelper.hpp index d4d155a..a16ccd9 100644 --- a/src/Dialect/Krnl/KrnlHelper.hpp +++ b/src/Dialect/Krnl/KrnlHelper.hpp @@ -83,6 +83,13 @@ struct KrnlIterateOperandPack { _operands.end(), optimizedLoops.begin(), optimizedLoops.end()); } + // Create a pack with optimizedLoops = inputLoops (ie., no optimization). + KrnlIterateOperandPack( + mlir::Builder &builder, llvm::ArrayRef inputLoops) + : builder(builder), inputLoops(inputLoops), optimizedLoops(inputLoops) { + _operands.insert(_operands.end(), inputLoops.begin(), inputLoops.end()); + } + void pushConstantBound(int64_t bound); void pushOperandBound(mlir::Value operand); @@ -112,19 +119,15 @@ private: }; // Helper function to write kernel loops. This class will let us build a single -// define/optimize/iterate operation combo. We can then insert optimizations in -// the body of the optimization operation, and operations in the body of the -// iterate operation. +// define/iterate operation combo. We can then insert operations in the body of +// the iterate operation. // // The sequence is as follow: // // 1) Create an object giving the rewriter, location, and number of loop in // the original (non optimized) loop. // -// 2) Create define & optimize ops (currently paired). Optimizations can then -// be added to the inner block of the optimize operation. Make sure to set -// the insertion point to that block for optimizations to go in the right -// place. +// 2) Create define_loops ops to define new loop variables. // // 3) Push the bounds for each of the original loops. Bounds are pushed in // pairs (lower & upper bounds). There are a few methods to do it depending @@ -153,7 +156,7 @@ public: // Create define and optimize loop with loopNum original loops. If // withEmptyOptimization is true, the optimization is simply the identity // function (no optimizations). - void createDefineAndOptimizeOp(bool withEmptyOptimization = true); + void createDefineOp(); // Push bounds (lower and upper) for each of the loops (order matters). // The function returns the order number associated with the loop iteration. @@ -172,13 +175,12 @@ public: // operations associated with this loop nest have been emitted already. void createIterateOp(); - // Create the loop nest definition, optimization and iteration operations + // Create the loop nest definition and iteration operations // for a given operand of MemRef type. The loop nest has a depth equal to the // rank of the MemRef operand. The lower bound of each loop is zero. The // upper bound of each loop is given by the corresponding dimension of the // MemRef operand. - void createDefineOptimizeAndIterateOp( - Value memRefOperand, bool withEmptyOptimization = true); + void createDefineAndIterateOp(Value memRefOperand); // Get the (original loop) induction variable associated with the given // index. Use the index returned when pushing the bounds. @@ -220,7 +222,6 @@ private: // Flags that keep track of emitted operations. bool createdDefineOp; - bool createdOptimizeOp; bool createdIterateOp; // Saved insertion point in the code region of the KrnlOptimizeLoopsOp. diff --git a/src/Dialect/Krnl/KrnlOps.cpp b/src/Dialect/Krnl/KrnlOps.cpp index ffa68c2..f15c21e 100644 --- a/src/Dialect/Krnl/KrnlOps.cpp +++ b/src/Dialect/Krnl/KrnlOps.cpp @@ -78,47 +78,6 @@ ParseResult parseKrnlDefineLoopsOp( return success(); } -//===----------------------------------------------------------------------===// -// KrnlOptimizeLoopsOp -//===----------------------------------------------------------------------===// - -void KrnlOptimizeLoopsOp::build( - OpBuilder &builder, OperationState &result, int num_optimized_loops) { - result.types.append(num_optimized_loops, LoopType::get(builder.getContext())); - // Create a region and a block for the body. - // Schedule intrinsics will be placed into this region. - Region *region = result.addRegion(); - auto *body = new Block(); - region->push_back(body); -} - -void print(OpAsmPrinter &p, KrnlOptimizeLoopsOp &op) { - p << "krnl.optimize_loops "; - p.printRegion(op.region(), /*printEntryBlockArgs=*/false, - /*printBlockTerminators=*/true); - p << " : "; - p.printFunctionalType(op); -} - -ParseResult parseKrnlOptimizeLoopsOp( - OpAsmParser &parser, OperationState &result) { - // Parse the schedule body region. - Region *region = result.addRegion(); - if (parser.parseRegion(*region, llvm::None, llvm::None)) - return failure(); - - // Parse the function type for the schedule operation. - // Then following the hint of this parsed function type, parse the - // returned timestamp space dimension handlers. - FunctionType schedule_func_type; - if (parser.parseColonType(schedule_func_type) || - parser.addTypesToList(schedule_func_type.getResults(), result.types)) { - failure(); - } - - return success(); -} - //===----------------------------------------------------------------------===// // KrnlIterateOp //===----------------------------------------------------------------------===// @@ -340,26 +299,9 @@ static LogicalResult verify(KrnlIterateOp op) { } //===----------------------------------------------------------------------===// -// KrnlReturnLoopsOp +// KrnlEntryPointOp //===----------------------------------------------------------------------===// -void print(OpAsmPrinter &p, KrnlReturnLoopsOp &op) { - p << "krnl.return_loops "; - p.printOperands(op.operand_begin(), op.operand_end()); -} - -ParseResult parseKrnlReturnLoopsOp( - OpAsmParser &parser, OperationState &result) { - // Parse the loops to return. - SmallVector timestamp_dim_handlers; - if (parser.parseOperandList(timestamp_dim_handlers) || - parser.resolveOperands(timestamp_dim_handlers, - LoopType::get(result.getContext()), result.operands)) - return failure(); - - return success(); -} - void KrnlEntryPointOp::build(mlir::OpBuilder &builder, OperationState &state, SymbolRefAttr funcAttr, IntegerAttr numInputs, IntegerAttr numOutputs) { state.addAttribute(KrnlEntryPointOp::getEntryPointFuncAttrName(), funcAttr); diff --git a/src/Dialect/Krnl/KrnlOps.td b/src/Dialect/Krnl/KrnlOps.td index 32b2c13..cfc0e77 100644 --- a/src/Dialect/Krnl/KrnlOps.td +++ b/src/Dialect/Krnl/KrnlOps.td @@ -48,32 +48,6 @@ def KrnlDefineLoopsOp : Op { }]; } -def KrnlOptimizeLoopsOp : Op { - let summary = "optimize_loops operation"; - let description = [{ - The "krnl.optimize_loops" operation is essentially a cosmetic operation - which exists to encapsulate a region where loops are being scheduled / - optimized. - - The optimized loops are returned at the end of the region associated with - the krnl.optimize_loops operation. - - For example : TBD once we have actual schedule intrinsics. - }]; - - let arguments = (ins Variadic); - let results = (outs Variadic); - let regions = (region SizedRegion<1>:$region); - - let skipDefaultBuilders = 1; - - let builders = [ OpBuilder<"OpBuilder &builder, OperationState &result, " - "int timestamp_space_rank"> ]; - - let printer = [{ return ::print(p, *this); }]; - let parser = [{ return ::parse$cppClass(parser, result); }]; -} - def KrnlIterateOp : Op { let summary = "iterate operation"; let description = [{ @@ -129,19 +103,6 @@ def KrnlIterateOp : Op { - let summary = "Krnl return handler operation"; - let description = [{ - Krnl return_loops operation is a terminator operation for returning - scheduled dimension handlers in the krnl.optimize_loops region. - }]; - - let arguments = (ins Variadic); - - let printer = [{ return ::print(p, *this); }]; - let parser = [{ return ::parse$cppClass(parser, result); }]; -} - def KrnlTerminatorOp : Op { let summary = "Krnl terminator operation"; let description = [{ diff --git a/src/Transform/LowerKrnl.cpp b/src/Transform/LowerKrnl.cpp index c165812..6ce2e1d 100644 --- a/src/Transform/LowerKrnl.cpp +++ b/src/Transform/LowerKrnl.cpp @@ -125,21 +125,6 @@ public: // Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation. //===----------------------------------------------------------------------===// -class KrnlOptimizeLoopsLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite( - KrnlOptimizeLoopsOp op, PatternRewriter &rewriter) const override { - rewriter.eraseOp(op); - return success(); - } -}; - -//===----------------------------------------------------------------------===// -// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation. -//===----------------------------------------------------------------------===// - class KrnlBlockOpLowering : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; @@ -151,21 +136,6 @@ public: } }; -//===----------------------------------------------------------------------===// -// Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation. -//===----------------------------------------------------------------------===// - -class KrnlReturnLoopOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite( - KrnlReturnLoopsOp op, PatternRewriter &rewriter) const override { - rewriter.eraseOp(op); - return success(); - } -}; - //===----------------------------------------------------------------------===// // KrnlToAffineLoweringPass //===----------------------------------------------------------------------===// @@ -230,14 +200,11 @@ void KrnlToAffineLoweringPass::runOnFunction() { OwningRewritePatternList patterns; patterns.insert( - &getContext()); + KrnlBlockOpLowering>(&getContext()); // Do not lower operations that pertain to schedules just yet. target.addLegalOp(); target.addLegalOp(); - target.addLegalOp(); - target.addLegalOp(); if (failed(applyPartialConversion(function, target, patterns))) return signalPassFailure(); @@ -312,8 +279,6 @@ void KrnlToAffineLoweringPass::runOnFunction() { // Remove/lower schedule related operations. target.addIllegalOp(); target.addIllegalOp(); - target.addIllegalOp(); - target.addIllegalOp(); if (failed(applyPartialConversion(function, target, patterns))) return signalPassFailure(); } diff --git a/test/mlir/krnl/ops.mlir b/test/mlir/krnl/ops.mlir index 2e7c89d..0989e70 100644 --- a/test/mlir/krnl/ops.mlir +++ b/test/mlir/krnl/ops.mlir @@ -12,9 +12,6 @@ func @simple_iterate(%N : index) { %ii, %ij, %ik = krnl.define_loops 3 - %oi, %oj, %ok = krnl.optimize_loops { - krnl.return_loops %ii, %ij, %ik - } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): @@ -22,18 +19,18 @@ func @simple_iterate(%N : index) { // GENERIC-NEXT: bounds = [#{{.*}}, #{{.*}}, #{{.*}}, #{{.*}}] // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 1 to 11) { - krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) { + krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) { } // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) { - krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) { + krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) { // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}) ( { // GENERIC-NEXT: ^bb0(%{{.*}}: index): // CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10) { - krnl.iterate(%ok) with (%ik -> %k = 0 to 10) { + krnl.iterate(%ik) with (%ik -> %k = 0 to 10) { } } @@ -41,7 +38,7 @@ func @simple_iterate(%N : index) { // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to %{{.*}}, %{{.*}} -> %{{.*}} = 0 to 10) { - krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) { + krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) { } @@ -52,25 +49,22 @@ func @simple_iterate(%N : index) { func @affine_map_bound(%N : index) { %ii, %ij, %ik = krnl.define_loops 3 - %oi, %oj, %ok = krnl.optimize_loops { - krnl.return_loops %ii, %ij, %ik - } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) { - krnl.iterate(%oi, %oj) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) { + krnl.iterate(%ii, %ij) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) { // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { // GENERIC-NEXT: ^bb0(%{{.*}}: index): // CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = #{{.*}}(%{{.*}}, %{{.*}}) to #{{.*}}(%{{.*}}, %{{.*}})) { - krnl.iterate(%ok) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) { + krnl.iterate(%ik) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) { } // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { // GENERIC-NEXT: ^bb0(%{{.*}}: index): // CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = max #map{{.*}}(%{{.*}}, %{{.*}}) to min #map{{.*}}(%{{.*}}, %{{.*}})[%{{.*}}]) { - krnl.iterate(%ok) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) { + krnl.iterate(%ik) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) { } } diff --git a/test/mlir/onnx/onnx_enable_memory_pool.mlir b/test/mlir/onnx/onnx_enable_memory_pool.mlir index dd78e38..62b305c 100644 --- a/test/mlir/onnx/onnx_enable_memory_pool.mlir +++ b/test/mlir/onnx/onnx_enable_memory_pool.mlir @@ -12,14 +12,12 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { // CHECK: [[MEMPOOL:%.+]] = alloc() : memref<400xi8> // CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> // CHECK: krnl.define_loops - // CHECK: krnl.optimize_loops // CHECK: krnl.iterate // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32> // CHECK: krnl.define_loops - // CHECK: krnl.optimize_loops // CHECK: krnl.iterate // CHECK: dealloc [[MEMPOOL]] : memref<400xi8> // CHECK: return [[RES]] : memref<10x10xf32> @@ -41,14 +39,12 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3 // CHECK: [[MEMPOOL1:%.+]] = alloc() : memref<400xi8> // CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> // CHECK: krnl.define_loops - // CHECK: krnl.optimize_loops // CHECK: krnl.iterate // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32> // CHECK: krnl.define_loops - // CHECK: krnl.optimize_loops // CHECK: krnl.iterate // CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32> // CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32> @@ -57,7 +53,6 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3 // CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32 // CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> // CHECK: krnl.define_loops - // CHECK: krnl.optimize_loops // CHECK: krnl.iterate // CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> // CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32> diff --git a/test/mlir/onnx/onnx_krnl_global_elision.mlir b/test/mlir/onnx/onnx_krnl_global_elision.mlir index 31aa2d1..850757d 100644 --- a/test/mlir/onnx/onnx_krnl_global_elision.mlir +++ b/test/mlir/onnx/onnx_krnl_global_elision.mlir @@ -2,7 +2,7 @@ // CHECK-LABEL: func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32> func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32> { - %0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32> + %0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 1.0, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32> return %0 : memref<1x70xf32> // CHECK: {{.*}} = "krnl.global"() {name = "constant_0", shape = [1, 70]} : () -> memref<1x70xf32> diff --git a/test/mlir/onnx/onnx_lowering.mlir b/test/mlir/onnx/onnx_lowering.mlir index f0c20cd..5aa3ef7 100644 --- a/test/mlir/onnx/onnx_lowering.mlir +++ b/test/mlir/onnx/onnx_lowering.mlir @@ -71,10 +71,7 @@ func @test_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK-LABEL: test_add // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 @@ -91,10 +88,7 @@ func @test_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK-LABEL: test_mul // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 @@ -111,10 +105,7 @@ func @test_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK-LABEL: test_div // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 @@ -131,10 +122,7 @@ func @test_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK-LABEL: test_sub // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 @@ -151,10 +139,7 @@ func @test_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*xi // CHECK-LABEL: test_and // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 @@ -171,10 +156,7 @@ func @test_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*xi1 // CHECK-LABEL: test_or // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 @@ -191,10 +173,7 @@ func @test_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*xi // CHECK-LABEL: test_xor // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 @@ -213,12 +192,9 @@ func @test_exp(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 // CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref @@ -236,12 +212,9 @@ func @test_tanh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32 @@ -265,12 +238,9 @@ func @test_sinh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 @@ -294,12 +264,9 @@ func @test_cosh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 @@ -323,12 +290,9 @@ func @test_cos(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[COS:%.+]] = cos [[LOAD]] : f32 // CHECK: affine.store [[COS]], [[RES]][%arg1, %arg2] : memref @@ -346,12 +310,9 @@ func @test_log(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[LOG:%.+]] = log [[LOAD]] : f32 // CHECK: affine.store [[LOG]], [[RES]][%arg1, %arg2] : memref @@ -369,12 +330,9 @@ func @test_sigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -397,12 +355,9 @@ func @test_relu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32 @@ -491,10 +446,7 @@ func @test_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK-LABEL: test_sum // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 @@ -511,10 +463,7 @@ func @test_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK-LABEL: test_max // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 @@ -532,10 +481,7 @@ func @test_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK-LABEL: test_min // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 @@ -555,16 +501,13 @@ func @test_elu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref - // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 - // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 - // CHECK: [[ALPHA:%.+]] = constant {{2.+}} : f32 + // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 + // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 + // CHECK: [[ALPHA:%.+]] = constant {{2.+}} : f32 // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 // CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32 // CHECK: [[SUB:%.+]] = subf [[EXP]], [[ONE]] : f32 @@ -585,15 +528,12 @@ func @test_leakyrelu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref - // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 - // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 + // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 + // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 // CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32 // CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[LOAD]] : f32 // CHECK: [[SELECT:%.+]] = select [[CMP]], [[MUL]], [[LOAD]] : f32 @@ -612,16 +552,13 @@ func @test_selu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref - // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 - // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 - // CHECK: [[GAMMA:%.+]] = constant {{2.+}} : f32 + // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 + // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 + // CHECK: [[GAMMA:%.+]] = constant {{2.+}} : f32 // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 // CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD]], [[ZERO]] : f32 // CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[EXP]] : f32 @@ -643,17 +580,14 @@ func @test_hardsigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref - // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 - // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 - // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 - // CHECK: [[BETA:%.+]] = constant {{2.+}} : f32 + // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 + // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 + // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 + // CHECK: [[BETA:%.+]] = constant {{2.+}} : f32 // CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[LOAD]] : f32 // CHECK: [[ADD:%.+]] = addf [[MUL]], [[BETA]] : f32 // CHECK: [[CMP1:%.+]] = cmpf "ogt", [[ADD]], [[ZERO]] : f32 @@ -675,12 +609,9 @@ func @test_reciprocal(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 // CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32 @@ -699,12 +630,9 @@ func @test_softplus(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -725,12 +653,9 @@ func @test_softsign(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ABS:%.+]] = absf [[LOAD]] : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -755,12 +680,9 @@ func @test_add_with_broadcasting(%arg0 : tensor, %arg1 : tensor // CHECK: [[ONE:%.+]] = constant 1 : index // CHECK: [[IS_ONE:%.+]] = cmpi "eq", [[DIM2]], [[ONE]] : index // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_1:%.+]] = constant 0 : index // CHECK: [[DIM3:%.+]] = dim [[RES]], [[C0_1]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM3]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM3]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[ZERO:%.+]] = constant 0 : index // CHECK: %[[SELECT1:.+]] = select [[IS_ONE]], [[ZERO]], %arg3 : index // CHECK: [[LOAD1:%.+]] = load %arg0[%[[SELECT1]]] : memref @@ -780,23 +702,17 @@ func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK-LABEL: test_reducemax // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS1:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32 // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> // CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32 - // CHECK: [[SELECT:%.+]] = select %7, %6, %5 : f32 - // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32 + // CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> } @@ -810,22 +726,16 @@ func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK-LABEL: test_reducemin // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS1:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32 // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> // CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32 - // CHECK: [[SELECT:%.+]] = select %7, %6, %5 : f32 + // CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD2]], [[LOAD1]] : f32 // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> @@ -840,21 +750,15 @@ func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK-LABEL: test_reduceprod // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS1:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32 // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> - // CHECK: [[REDUCE:%.+]] = mulf %6, %5 : f32 + // CHECK: [[REDUCE:%.+]] = mulf [[LOAD2]], [[LOAD1]] : f32 // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> @@ -869,21 +773,15 @@ func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK-LABEL: test_reducesum // CHECK: [[RES:%.+]] = alloc() : memref<3x2xf32> // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS1:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> - // CHECK: [[REDUCE:%.+]] = addf %6, %5 : f32 + // CHECK: [[REDUCE:%.+]] = addf [[LOAD2]], [[LOAD1]] : f32 // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> @@ -902,33 +800,30 @@ func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> { // CHECK: [[CST:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[CST_0:%.+]] = constant 0xFF800000 : f32 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, %3#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 10) { // CHECK: affine.store [[CST]], [[SUM]][] : memref // CHECK: affine.store [[CST_0]], [[MAX]][] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[MAX]][] : memref // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[COND:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[SELECT:%.+]] = select [[COND]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: affine.store [[SELECT]], [[MAX]][] : memref // CHECK: } - // CHECK: %5 = affine.load [[MAX]][] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: [[LOAD_MAX:%.+]] = affine.load [[MAX]][] : memref + // CHECK: krnl.iterate([[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD1]] = affine.load [[SUM]][] : memref // CHECK: [[LOAD2]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> - // CHECK: [[SUB:%.+]] = subf [[LOAD2]], %5 : f32 + // CHECK: [[SUB:%.+]] = subf [[LOAD2]], [[LOAD_MAX]] : f32 // CHECK: [[EXP:%.+]] = exp [[SUB]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[EXP]] : f32 // CHECK: affine.store [[ADD]], [[SUM]][] : memref - // CHECK: affine.store %10, [[RES]][%arg1, %arg2] : memref<10x10xf32> + // CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<10x10xf32> // CHECK: } - // CHECK: %6 = affine.load [[SUM]][] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: [[LOAD_SUM:%.+]] = affine.load [[SUM]][] : memref + // CHECK: krnl.iterate([[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD1]] = affine.load [[RES]][%arg1, %arg2] : memref<10x10xf32> - // CHECK: [[DIV:%.+]] = divf [[LOAD1]], %6 : f32 + // CHECK: [[DIV:%.+]] = divf [[LOAD1]], [[LOAD_SUM]] : f32 // CHECK: affine.store [[DIV]], [[RES]][%arg1, %arg2] : memref<10x10xf32> // CHECK: } // CHECK: } @@ -948,11 +843,8 @@ func @test_gemm(%arg0 : tensor<5x10xf32>, %arg1 : tensor<5x10xf32>, %arg2: tenso // CHECK: [[ALPHA:%.+]] = constant 1.000000e+00 : f32 // CHECK: [[BETA:%.+]] = constant 5.000000e+00 : f32 // CHECK: [[DEF_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOPS:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg3 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg4 = 0 to 10) { - // CHECK: krnl.iterate([[OPT_LOOPS]]#2) with ([[DEF_LOOPS]]#2 -> %arg5 = 0 to 5) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg3 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg4 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#2) with ([[DEF_LOOPS]]#2 -> %arg5 = 0 to 5) { // CHECK: [[A:%.+]] = affine.load %arg0[%arg5, %arg3] : memref<5x10xf32> // CHECK: [[B:%.+]] = affine.load %arg1[%arg5, %arg4] : memref<5x10xf32> // CHECK: [[Y:%.+]] = affine.load [[RES]][%arg3, %arg4] : memref<10x10xf32> @@ -982,12 +874,9 @@ func @test_sqrt(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[SQRT:%.+]] = sqrt [[LOAD]] : f32 // CHECK: affine.store [[SQRT]], [[RES]][%arg1, %arg2] : memref @@ -1026,19 +915,13 @@ func @test_transpose(%arg0 : tensor<10x20x30x40xf32>) -> tensor<*xf32> { // CHECK: [[RES0:%.+]] = alloc() : memref<40x10x30x20xf32> // CHECK: [[RES1:%.+]] = alloc() : memref<40x30x20x10xf32> - // CHECK: [[LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1, [[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#0 -> %arg1 = 0 to 10, [[LOOPS]]#1 -> %arg2 = 0 to 20, [[LOOPS]]#2 -> %arg3 = 0 to 30, [[LOOPS]]#3 -> %arg4 = 0 to 40) { + // CHECK: [[DEF_LOOPS:%.+]]:4 = krnl.define_loops 4 + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg2 = 0 to 20, [[DEF_LOOPS]]#2 -> %arg3 = 0 to 30, [[DEF_LOOPS]]#3 -> %arg4 = 0 to 40) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3, %arg4] : memref<10x20x30x40xf32> // CHECK: affine.store [[LOAD]], [[RES1]][%arg4, %arg3, %arg2, %arg1] : memref<40x30x20x10xf32> - // CHECK: [[LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1, [[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#0 -> %arg1 = 0 to 40, [[LOOPS]]#1 -> %arg2 = 0 to 30, [[LOOPS]]#2 -> %arg3 = 0 to 20, [[LOOPS]]#3 -> %arg4 = 0 to 10) { + // CHECK: [[DEF_LOOPS:%.+]]:4 = krnl.define_loops 4 + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 40, [[DEF_LOOPS]]#1 -> %arg2 = 0 to 30, [[DEF_LOOPS]]#2 -> %arg3 = 0 to 20, [[DEF_LOOPS]]#3 -> %arg4 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES1]][%arg1, %arg2, %arg3, %arg4] : memref<40x30x20x10xf32> // CHECK: affine.store [[LOAD]], [[RES0]][%arg1, %arg4, %arg2, %arg3] : memref<40x10x30x20xf32> @@ -1067,12 +950,9 @@ func @test_sign_f(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0_0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -1096,12 +976,9 @@ func @test_sign_i(%arg0 : tensor) -> tensor<*xi32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant 0 : i32 // CHECK: [[ONE:%.+]] = constant 1 : i32 @@ -1124,17 +1001,11 @@ func @test_matmul1(%arg0 : tensor<10x5xf32>, %arg1 : tensor<5x10xf32>) -> tensor // CHECK-LABEL: test_matmul1 // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 - // CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 10, [[LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS_REDUCE]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) { + // CHECK: [[DEF_LOOPS_REDUCE:%.+]] = krnl.define_loops 1 + // CHECK: krnl.iterate([[DEF_LOOPS_REDUCE]]) with ([[DEF_LOOPS_REDUCE]] -> %arg4 = 0 to 5) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2, %arg4] : memref<10x5xf32> // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<5x10xf32> // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> @@ -1157,17 +1028,11 @@ func @test_matmul2(%arg0 : tensor<10x5xf32>, %arg1 : tensor<2x3x5x10xf32>) -> te // CHECK: [[RES:%.+]] = alloc() : memref<2x3x10x10xf32> // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) { - // CHECK: krnl.iterate([[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) { // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS_REDUCE]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) { + // CHECK: krnl.iterate([[DEF_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg4, %arg6] : memref<10x5xf32> // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32> // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> @@ -1191,17 +1056,11 @@ func @test_matmul3(%arg0 : tensor<2x3x10x5xf32>, %arg1 : tensor<2x3x5x10xf32>) - // CHECK: [[RES:%.+]] = alloc() : memref<2x3x10x10xf32> // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) { - // CHECK: krnl.iterate([[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) { - // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) { + // CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS_REDUCE]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) { + // CHECK: krnl.iterate([[DEF_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2, %arg3, %arg4, %arg6] : memref<2x3x10x5xf32> // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32> // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> @@ -1225,16 +1084,10 @@ func @test_matmul4(%arg0 : tensor<5xf32>, %arg1 : tensor<5x10xf32>) -> tensor<*x // CHECK: [[RES:%.+]] = alloc() : memref<10xf32> // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS]]) with ([[LOOPS]] -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]) with ([[LOOPS]] -> %arg2 = 0 to 10) { // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2] : memref<10xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS_REDUCE]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg3 = 0 to 5) { + // CHECK: krnl.iterate([[DEF_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg3 = 0 to 5) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg3] : memref<5xf32> // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg3, %arg2] : memref<5x10xf32> // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2] : memref<10xf32> @@ -1258,20 +1111,14 @@ func @test_matmul5(%arg0 : tensor<5xf32>, %arg1 : tensor) -> tensor< // CHECK: [[C0:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim %arg1, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref - // CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) + // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) { - // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref - // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS_REDUCE]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) { + // CHECK: [[DEF_LOOPS_REDUCE:%.+]] = krnl.define_loops 1 + // CHECK: krnl.iterate([[DEF_LOOPS_REDUCE]]) with ([[DEF_LOOPS_REDUCE]] -> %arg4 = 0 to 5) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg4] : memref<5xf32> // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2, %arg4, %arg3] : memref // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref @@ -1297,19 +1144,13 @@ func @test_matmul6(%arg0 : tensor, %arg1 : tensor<5xf32>) -> tensor< // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_1:%.+]] = dim [[RES]], [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) { - // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[LOOPS]]#0) with ([[LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) { + // CHECK: krnl.iterate([[LOOPS]]#1) with ([[LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS_REDUCE]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) { + // CHECK: krnl.iterate([[LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2, %arg3, %arg4] : memref // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg4] : memref<5xf32> // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref @@ -1335,10 +1176,7 @@ func @test_matmul7(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<*xf32 // CHECK: %[[CONSTANT_INDEX:.+]] = constant 0 : index // CHECK: affine.store [[CONSTANT]], [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[LOOPS_REDUCE]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg2 = 0 to 5) { + // CHECK: krnl.iterate([[LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg2 = 0 to 5) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2] : memref<5xf32> // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2] : memref<5xf32> // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> @@ -1362,24 +1200,15 @@ func @test_conv_no_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2 // CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[CONST2:%.+]] = constant 2 : index // CHECK: [[OUTER_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_OUTER_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_OUTER_LOOPS]]#0, [[OPT_OUTER_LOOPS]]#1) with ([[OUTER_LOOPS]]#0 -> %arg2 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg3 = 0 to 5) { + // CHECK: krnl.iterate([[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1) with ([[OUTER_LOOPS]]#0 -> %arg2 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg3 = 0 to 5) { // CHECK: [[SPATIAL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_SPATIAL_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg4 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg5 = 0 to 58) { + // CHECK: krnl.iterate([[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg4 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg5 = 0 to 58) { // CHECK: affine.store [[CONST1]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x27x58xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg6 = 0 to 2, [[INNER_LOOPS]]#1 -> %arg7 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg8 = 0 to 7) { + // CHECK: krnl.iterate([[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg6 = 0 to 2, [[INNER_LOOPS]]#1 -> %arg7 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg8 = 0 to 7) { // CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg4, %arg7) // CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg5, %arg8) // CHECK: [[DATA:%.+]] = affine.load %arg0[%arg2, %arg6, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x2x32x64xf32> @@ -1407,24 +1236,15 @@ func @test_conv_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x // CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[CONST2:%.+]] = constant 2 : index // CHECK: [[OUTER_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_OUTER_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_OUTER_LOOPS]]#0, [[OPT_OUTER_LOOPS]]#1) with ([[OUTER_LOOPS]]#0 -> %arg3 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg4 = 0 to 5) { + // CHECK: krnl.iterate([[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1) with ([[OUTER_LOOPS]]#0 -> %arg3 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg4 = 0 to 5) { // CHECK: [[SPATIAL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_SPATIAL_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg5 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg6 = 0 to 58) { + // CHECK: krnl.iterate([[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg5 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg6 = 0 to 58) { // CHECK: affine.store [[CONST1]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg7 = 0 to 2, [[INNER_LOOPS]]#1 -> %arg8 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg9 = 0 to 7) { + // CHECK: krnl.iterate([[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg7 = 0 to 2, [[INNER_LOOPS]]#1 -> %arg8 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg9 = 0 to 7) { // CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg5, %arg8) // CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg6, %arg9) // CHECK: [[DATA:%.+]] = affine.load %arg0[%arg3, %arg7, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x2x32x64xf32> @@ -1456,25 +1276,16 @@ func @test_conv_no_bias_no_pad_w_group(%arg0 : tensor<1x9x32x64xf32>, %arg1 : te // CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[CONST2:%.+]] = constant 3 : index // CHECK: [[OUTER_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_OUTER_LOOPS:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1, [[OUTER_LOOPS]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_OUTER_LOOPS]]#0, [[OPT_OUTER_LOOPS]]#1, [[OPT_OUTER_LOOPS]]#2) with ([[OUTER_LOOPS]]#0 -> %arg2 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg3 = 0 to 3, [[OUTER_LOOPS]]#2 -> %arg4 = 0 to 1) { + // CHECK: krnl.iterate([[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1, [[OUTER_LOOPS]]#2) with ([[OUTER_LOOPS]]#0 -> %arg2 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg3 = 0 to 3, [[OUTER_LOOPS]]#2 -> %arg4 = 0 to 1) { // CHECK: %[[ADD1:.+]] = affine.apply #{{.*}}(%arg3, [[CONST0]])[%arg4] // CHECK: [[SPATIAL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_SPATIAL_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg5 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg6 = 0 to 58) { + // CHECK: krnl.iterate([[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg5 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg6 = 0 to 58) { // CHECK: affine.store [[CONST1]], [[RES]][%arg2, %[[ADD1]], %arg5, %arg6] : memref<1x5x27x58xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg7 = 0 to 3, [[INNER_LOOPS]]#1 -> %arg8 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg9 = 0 to 7) { + // CHECK: krnl.iterate([[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg7 = 0 to 3, [[INNER_LOOPS]]#1 -> %arg8 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg9 = 0 to 7) { // CHECK: [[ADD2:%.+]] = affine.apply #{{.*}}(%arg3, %arg7)[%c3] // CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg5, %arg8) // CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg6, %arg9) @@ -1504,24 +1315,15 @@ func @test_conv_no_bias_no_pad_w_strides(%arg0 : tensor<1x9x32x64xf32>, %arg1 : // CHECK: [[CONST1:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[CONST2:%.+]] = constant 9 : index // CHECK: [[OUTER_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_OUTER_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_OUTER_LOOPS]]#0, [[OPT_OUTER_LOOPS]]#1) with ([[OUTER_LOOPS]]#0 -> %arg2 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg3 = 0 to 5) { + // CHECK: krnl.iterate([[OUTER_LOOPS]]#0, [[OUTER_LOOPS]]#1) with ([[OUTER_LOOPS]]#0 -> %arg2 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg3 = 0 to 5) { // CHECK: [[SPATIAL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_SPATIAL_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg4 = 0 to 14, [[SPATIAL_LOOPS]]#1 -> %arg5 = 0 to 29) { + // CHECK: krnl.iterate([[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg4 = 0 to 14, [[SPATIAL_LOOPS]]#1 -> %arg5 = 0 to 29) { // CHECK: affine.store [[CONST1]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x14x29xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg6 = 0 to 9, [[INNER_LOOPS]]#1 -> %arg7 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg8 = 0 to 7) { + // CHECK: krnl.iterate([[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg6 = 0 to 9, [[INNER_LOOPS]]#1 -> %arg7 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg8 = 0 to 7) { // CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg4, %arg7) // CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg5, %arg8) // CHECK: [[DATA:%.+]] = affine.load %arg0[%arg2, %arg6, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x9x32x64xf32> @@ -1547,15 +1349,12 @@ func @test_batchnorm_testmode_Nd(%arg0: tensor<1x2x1x3xf32>, %arg1: tensor<2xf32 // CHECK: [[RES:%.+]] = alloc() : memref<1x2x1x3xf32> // CHECK: [[EPSILON:%.+]] = constant 9.99999974E-6 : f32 // CHECK: [[DEF_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg5 = 0 to 2) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg5 = 0 to 2) { // CHECK: [[SCALE:%.+]] = affine.load %arg1[%arg5] : memref<2xf32> // CHECK: [[BIAS:%.+]] = affine.load %arg2[%arg5] : memref<2xf32> // CHECK: [[MEAN:%.+]] = affine.load %arg3[%arg5] : memref<2xf32> // CHECK: [[VARIANCE:%.+]] = affine.load %arg4[%arg5] : memref<2xf32> - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg6 = 0 to 1, [[DEF_LOOPS]]#2 -> %arg7 = 0 to 1, [[DEF_LOOPS]]#3 -> %arg8 = 0 to 3) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg6 = 0 to 1, [[DEF_LOOPS]]#2 -> %arg7 = 0 to 1, [[DEF_LOOPS]]#3 -> %arg8 = 0 to 3) { // CHECK: [[LOADED_VAL:%.+]] = affine.load %arg0[%arg6, %arg5, %arg7, %arg8] : memref<1x2x1x3xf32> // CHECK: [[DIVIDEND:%.+]] = subf [[LOADED_VAL]], [[MEAN]] : f32 // CHECK: [[ADJUSTED_VARIANCE:%.+]] = addf [[VARIANCE]], [[EPSILON]] : f32 @@ -1579,15 +1378,12 @@ func @test_batchnorm_testmode_1d(%arg0: tensor<10xf32>, %arg1: tensor<1xf32>, %a // CHECK: [[RES:%.+]] = alloc() : memref<10xf32> // CHECK: [[EPSILON:%.+]] = constant 9.99999974E-6 : f32 // CHECK: [[DEF_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: [[OPT_LOOPS:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]] - // CHECK: } : () -> !krnl.loop // CHECK: %[[ZERO_INDEX:.+]] = constant 0 : index // CHECK: [[SCALE:%.+]] = affine.load %arg1[%[[ZERO_INDEX]]] : memref<1xf32> // CHECK: [[BIAS:%.+]] = affine.load %arg2[%[[ZERO_INDEX]]] : memref<1xf32> // CHECK: [[MEAN:%.+]] = affine.load %arg3[%[[ZERO_INDEX]]] : memref<1xf32> // CHECK: [[VARIANCE:%.+]] = affine.load %arg4[%[[ZERO_INDEX]]] : memref<1xf32> - // CHECK: krnl.iterate([[OPT_LOOPS]]) with ([[DEF_LOOPS]] -> %arg5 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]) with ([[DEF_LOOPS]] -> %arg5 = 0 to 10) { // CHECK: [[LOADED_VAL:%.+]] = affine.load %arg0[%arg5] : memref<10xf32> // CHECK: [[DIVIDEND:%.+]] = subf [[LOADED_VAL]], [[MEAN]] : f32 // CHECK: [[ADJUSTED_VARIANCE:%.+]] = addf [[VARIANCE]], [[EPSILON]] : f32 @@ -1611,12 +1407,9 @@ func @test_abs_float(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ABS:%.+]] = absf [[LOAD]] : f32 // CHECK: affine.store [[ABS]], [[RES]][%arg1, %arg2] : memref @@ -1634,12 +1427,9 @@ func @test_abs_int(%arg0 : tensor) -> tensor<*xi32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant 0 : i32 // CHECK: [[LESS_THAN_ZERO:%.+]] = cmpi "slt", [[LOAD]], [[ZERO]] : i32 @@ -1657,18 +1447,12 @@ func @test_constant_pad1(%arg0: tensor<16x16xf32>) -> tensor<18x20xf32> { // CHECK-LABEL: test_constant_pad1 // CHECK: [[RES:%.+]] = alloc() : memref<18x20xf32> // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS1:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 18, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 20) { + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 18, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 20) { // CHECK: [[CST:%.+]] = constant 0.000000e+00 : f32 // CHECK: affine.store [[CST]], [[RES]][%arg1, %arg2] : memref<18x20xf32> // CHECK: } // CHECK: [[DEF_LOOPS2:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS2:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 16, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 16) { + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 16, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 16) { // CHECK: [[ADD:%.+]] = affine.apply #{{.*}}(%arg2) // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<16x16xf32> // CHECK: affine.store [[LOAD]], [[RES]][%arg1, [[ADD]]] : memref<18x20xf32> @@ -1682,18 +1466,12 @@ func @test_pad1(%arg0: tensor<16x16xf32>) -> tensor<18x20xf32> { // CHECK-LABEL: test_pad1 // CHECK: [[RES:%.+]] = alloc() : memref<18x20xf32> // CHECK: [[DEF_LOOPS1:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS1:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 18, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 20) { + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 18, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 20) { // CHECK: [[CST:%.+]] = constant 0.000000e+00 : f32 // CHECK: affine.store [[CST]], [[RES]][%arg1, %arg2] : memref<18x20xf32> // CHECK: } // CHECK: [[DEF_LOOPS2:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS2:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 16, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 16) { + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 16, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 16) { // CHECK: [[ADD:%.+]] = affine.apply #{{.*}}(%arg2) // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<16x16xf32> // CHECK: affine.store [[LOAD]], [[RES]][%arg1, [[ADD]]] : memref<18x20xf32> @@ -1719,27 +1497,18 @@ func @test_concat_1(%arg0 : tensor<5x5x1x32xf32>, %arg1 : tensor<5x5x3x32xf32>, // CHECK-LABEL: test_concat_1 // CHECK: [[RES:%.+]] = alloc() : memref<5x5x9x32xf32> // CHECK: [[DEF_LOOPS0:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS0:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS0]]#0, [[DEF_LOOPS0]]#1, [[DEF_LOOPS0]]#2, [[DEF_LOOPS0]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS0]]#0, [[OPT_LOOPS0]]#1, [[OPT_LOOPS0]]#2, [[OPT_LOOPS0]]#3) with ([[DEF_LOOPS0]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS0]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS0]]#2 -> %arg5 = 0 to 1, [[DEF_LOOPS0]]#3 -> %arg6 = 0 to 32) { + // CHECK: krnl.iterate([[DEF_LOOPS0]]#0, [[DEF_LOOPS0]]#1, [[DEF_LOOPS0]]#2, [[DEF_LOOPS0]]#3) with ([[DEF_LOOPS0]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS0]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS0]]#2 -> %arg5 = 0 to 1, [[DEF_LOOPS0]]#3 -> %arg6 = 0 to 32) { // CHECK: [[LOAD0:%.+]] = affine.load %arg0[%arg3, %arg4, %arg5, %arg6] : memref<5x5x1x32xf32> // CHECK: affine.store [[LOAD0]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<5x5x9x32xf32> // CHECK: [[DEF_LOOPS1:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS1:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1, [[DEF_LOOPS1]]#2, [[DEF_LOOPS1]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1, [[OPT_LOOPS1]]#2, [[OPT_LOOPS1]]#3) with ([[DEF_LOOPS1]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS1]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS1]]#2 -> %arg5 = 0 to 3, [[DEF_LOOPS1]]#3 -> %arg6 = 0 to 32) { + // CHECK: krnl.iterate([[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1, [[DEF_LOOPS1]]#2, [[DEF_LOOPS1]]#3) with ([[DEF_LOOPS1]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS1]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS1]]#2 -> %arg5 = 0 to 3, [[DEF_LOOPS1]]#3 -> %arg6 = 0 to 32) { // CHECK: [[AFFINE_APPLY1:%.+]] = affine.apply #{{.*}}(%arg5) // CHECK: [[LOAD1:%.+]] = affine.load %arg1[%arg3, %arg4, %arg5, %arg6] : memref<5x5x3x32xf32> // CHECK: affine.store [[LOAD1]], [[RES]][%arg3, %arg4, [[AFFINE_APPLY1]], %arg6] : memref<5x5x9x32xf32> // CHECK: [[DEF_LOOPS2:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_LOOPS2:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2, [[DEF_LOOPS2]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2, [[OPT_LOOPS2]]#3) with ([[DEF_LOOPS2]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS2]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS2]]#2 -> %arg5 = 0 to 5, [[DEF_LOOPS2]]#3 -> %arg6 = 0 to 32) { + // CHECK: krnl.iterate([[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2, [[DEF_LOOPS2]]#3) with ([[DEF_LOOPS2]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS2]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS2]]#2 -> %arg5 = 0 to 5, [[DEF_LOOPS2]]#3 -> %arg6 = 0 to 32) { // CHECK: [[AFFINE_APPLY2:%.+]] = affine.apply #{{.*}}(%arg5) // CHECK: [[LOAD2:%.+]] = affine.load %arg2[%arg3, %arg4, %arg5, %arg6] : memref<5x5x5x32xf32> // CHECK: affine.store [[LOAD2]], [[RES]][%arg3, %arg4, [[AFFINE_APPLY2]], %arg6] : memref<5x5x9x32xf32> @@ -1764,18 +1533,12 @@ func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_OUTPUT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_OUTPUT_LOOPS]]#0, [[OPT_OUTPUT_LOOPS]]#1, [[OPT_OUTPUT_LOOPS]]#2, [[OPT_OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { + // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_POOL_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { // CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> // CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> @@ -1838,16 +1601,10 @@ func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tenso // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_OUTPUT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_OUTPUT_LOOPS]]#0, [[OPT_OUTPUT_LOOPS]]#1, [[OPT_OUTPUT_LOOPS]]#2, [[OPT_OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { + // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_POOL_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> @@ -1871,16 +1628,10 @@ func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*x // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> // CHECK: [[OUTPUT_LOOPS:%.+]]:4 = krnl.define_loops 4 - // CHECK: [[OPT_OUTPUT_LOOPS:%.+]]:4 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_OUTPUT_LOOPS]]#0, [[OPT_OUTPUT_LOOPS]]#1, [[OPT_OUTPUT_LOOPS]]#2, [[OPT_OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { + // CHECK: krnl.iterate([[OUTPUT_LOOPS]]#0, [[OUTPUT_LOOPS]]#1, [[OUTPUT_LOOPS]]#2, [[OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_POOL_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + // CHECK: krnl.iterate([[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> @@ -1910,19 +1661,13 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: [[INITIAL_VALUE:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[INITIALIZE_LOOPS:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[INITIALIZE_OPT_LOOPS:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[INITIALIZE_OPT_LOOPS]]#0, [[INITIALIZE_OPT_LOOPS]]#1, [[INITIALIZE_OPT_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) { + // CHECK: krnl.iterate([[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) { // CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> // CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> // CHECK: } // CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: [[SEQUENCE_OPT_LOOPS:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[SEQUENCE_LOOPS]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[SEQUENCE_OPT_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { // CHECK: {{.*}} = constant 0 : index // CHECK: {{.*}} = constant 3 : index // CHECK: {{.*}} = constant 0 : index @@ -1934,10 +1679,7 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: {{.*}} = constant 6 : index // CHECK: {{.*}} = constant 7 : index // CHECK: [[DATA_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[DATA_OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[DATA_OPT_LOOPS]]#0, [[DATA_OPT_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) { + // CHECK: krnl.iterate([[DATA_LOOPS]]#0, [[DATA_LOOPS]]#1) with ([[DATA_LOOPS]]#0 -> %arg4 = 0 to 3, [[DATA_LOOPS]]#1 -> %arg5 = 0 to 3) { // CHECK: [[hCt:%.+]] = alloc() : memref // CHECK: [[Ot:%.+]] = alloc() : memref // CHECK: [[ct:%.+]] = alloc() : memref @@ -1965,10 +1707,7 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref // CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: [[REDUCTION_OPT_LOOPS:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[REDUCTION_LOOPS]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[REDUCTION_OPT_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) { + // CHECK: krnl.iterate([[REDUCTION_LOOPS]]) with ([[REDUCTION_LOOPS]] -> %arg6 = 0 to 2) { // CHECK: [[INPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c0_1, %c3] // CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3] // CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3] @@ -2140,10 +1879,7 @@ func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32> // CHECK-LABEL: @test_lstm_reverse_mode // CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: [[REVERSE_SEQUENCE_OPT_LOOPS:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[REVERSE_SEQUENCE_LOOPS]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[REVERSE_SEQUENCE_OPT_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}} // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> @@ -2160,17 +1896,11 @@ func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x // CHECK-LABEL: @test_lstm_bidirectional_mode // CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: [[SEQUENCE_OPT_LOOPS:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[SEQUENCE_LOOPS]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[SEQUENCE_OPT_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: krnl.iterate([[SEQUENCE_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32> // CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 - // CHECK: [[REVERSE_SEQUENCE_OPT_LOOPS:%.+]] = krnl.optimize_loops { - // CHECK: krnl.return_loops [[REVERSE_SEQUENCE_LOOPS]] - // CHECK: } : () -> !krnl.loop - // CHECK: krnl.iterate([[REVERSE_SEQUENCE_OPT_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { + // CHECK: krnl.iterate([[REVERSE_SEQUENCE_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}} // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> @@ -2218,18 +1948,12 @@ func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<* // CHECK: [[RES_1:%.+]] = alloc() : memref<8x32x64xf32> // CHECK: [[RES_0:%.+]] = alloc() : memref<8x32x64xf32> // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOP_0:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOP_0]]#0, [[OPT_LOOP_0]]#1, [[OPT_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { + // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOP_1:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOP_1]]#0, [[OPT_LOOP_1]]#1, [[OPT_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { + // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1) // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32> // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> @@ -2249,18 +1973,12 @@ func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tenso // CHECK: [[RES_1:%.+]] = alloc() : memref<16x30x64xf32> // CHECK: [[RES_0:%.+]] = alloc() : memref<16x2x64xf32> // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOP_0:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOP_0]]#0, [[OPT_LOOP_0]]#1, [[OPT_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { + // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32> // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOP_1:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOP_1]]#0, [[OPT_LOOP_1]]#1, [[OPT_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { + // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32> // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32> @@ -2284,22 +2002,16 @@ func @test_split_unknown_dimension(%arg0 : tensor) -> (tensor<*xf32> // CHECK: [[DIM_1:%.+]] = dim %arg0, [[C0_0]] : memref // CHECK: [[RES_1:%.+]] = alloc([[DIM_1]]) : memref // CHECK: [[DEF_LOOP_0:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOP_0:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_0:%.+]] = dim [[RES_0]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOP_0]]#0, [[OPT_LOOP_0]]#1, [[OPT_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { + // CHECK: krnl.iterate([[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 - // CHECK: [[OPT_LOOP_1:%.+]]:3 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2 - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: [[C0_3:%.+]] = constant 0 : index // CHECK: [[DIM_1:%.+]] = dim [[RES_1]], [[C0_3]] : memref - // CHECK: krnl.iterate([[OPT_LOOP_1]]#0, [[OPT_LOOP_1]]#1, [[OPT_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { + // CHECK: krnl.iterate([[DEF_LOOP_1]]#0, [[DEF_LOOP_1]]#1, [[DEF_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref diff --git a/test/mlir/onnx/onnx_lowering_with_dealloc.mlir b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir index 86fa0df..ccf653b 100644 --- a/test/mlir/onnx/onnx_lowering_with_dealloc.mlir +++ b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir @@ -12,10 +12,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 @@ -23,10 +20,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens /// Second Add // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 @@ -51,10 +45,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 @@ -62,10 +53,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens /// Second Mul // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 @@ -90,10 +78,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 @@ -101,10 +86,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens /// Second Div // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 @@ -129,10 +111,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 @@ -140,10 +119,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens /// Second Sub // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 @@ -168,10 +144,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 @@ -179,10 +152,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor /// Second And // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 @@ -207,10 +177,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<* // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 @@ -218,10 +185,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<* /// Second Or // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 @@ -246,10 +210,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 @@ -257,10 +218,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor /// Second Xor // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 @@ -286,12 +244,9 @@ func @test_exp_exp(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 // CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref @@ -301,12 +256,9 @@ func @test_exp_exp(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 // CHECK: affine.store [[EXP]], [[RET_RES]][%arg1, %arg2] : memref @@ -331,12 +283,9 @@ func @test_tanh_tanh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32 @@ -352,12 +301,9 @@ func @test_tanh_tanh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32 @@ -388,12 +334,9 @@ func @test_sinh_sinh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 @@ -409,12 +352,9 @@ func @test_sinh_sinh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 @@ -445,12 +385,9 @@ func @test_cosh_cosh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 @@ -466,12 +403,8 @@ func @test_cosh_cosh(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 @@ -502,12 +435,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -522,12 +452,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -557,12 +484,9 @@ func @test_relu_relu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32 @@ -574,12 +498,9 @@ func @test_relu_relu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32 @@ -605,10 +526,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 @@ -616,10 +534,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens /// Second Sum // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 @@ -644,10 +559,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 @@ -656,10 +568,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens /// Second Max // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 @@ -685,10 +594,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 @@ -697,10 +603,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens /// Second Min // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 @@ -727,12 +630,9 @@ func @test_elu_elu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -749,12 +649,9 @@ func @test_elu_elu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -786,12 +683,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 @@ -805,12 +699,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 @@ -839,12 +730,9 @@ func @test_selu_selu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 @@ -862,12 +750,9 @@ func @test_selu_selu(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32 @@ -900,12 +785,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -924,12 +806,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 @@ -963,12 +842,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_0:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 // CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32 @@ -979,12 +855,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor) -> tensor<*xf32> { // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 - // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { - // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 - // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: [[C0_2:%.+]] = constant 0 : index // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref - // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { + // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 // CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32