Remove optimize_loops/return_loops op. (#200)
* Remove optimize_loops/return_loops op in elementwise ops lowering and fix tests in onnx_lowering.mlir. * Fix all tests. * Remove all occurences of def_loops/return_loops. * Fix test. * Fix comments for defineLoops & emitKrnlLoopsAndIterationForOperand function. * Remove emitOptimizedLoops. * Allow not specifying optimizedLoops when creating KrnlIterateOperandPack. * Fix style. * Make BuildKernelLoop helper not emit optimize/return_loop operations & retire emitKrnlLoopsAndIterationForOperand by replacing it with BuildKernelLoop. * DefineLoops -> DefineLoopsEx, remove redundant emitKrnlLoopsAndIterationForOperand function. * BuildKrnlLoop API name update. * Tweak comments. * Remove unused withEmptyOptimization flag. * Better comment for BuildKrnlLoop. * Fully remove krnl.return_loops/optimize_loops op. * Trigger Windows Build * Bump windows ci python version.
This commit is contained in:
		
							parent
							
								
									07757a28ce
								
							
						
					
					
						commit
						01a4977c74
					
				|  | @ -13,7 +13,7 @@ jobs: | |||
|   steps: | ||||
|   - task: UsePythonVersion@0 | ||||
|     inputs: | ||||
|       versionSpec: '3.7.7' | ||||
|       versionSpec: '3.7.8' | ||||
|       architecture: 'x64' | ||||
| 
 | ||||
|   - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" | ||||
|  |  | |||
|  | @ -525,26 +525,16 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     SmallVector<Value, 4> loopIVs; | ||||
|     if (!hasAllScalarValues(operands)) { | ||||
|       std::vector<Value> originalLoops; | ||||
|       KrnlOptimizeLoopsOp optimizedLoopsOp; | ||||
|       KrnlIterateOp iterateOp; | ||||
|       emitKrnlLoopsAndIterationForOperand( | ||||
|           rewriter, loc, X, originalLoops, optimizedLoopsOp, iterateOp); | ||||
|       Block &optimizationBlock = optimizedLoopsOp.region().front(); | ||||
|       Block &iterationBlock = iterateOp.bodyRegion().front(); | ||||
|       // Create iterateOp & get block within iterate op.
 | ||||
|       BuildKrnlLoop loops(rewriter, loc, memRefType.getRank()); | ||||
|       loops.createDefineAndIterateOp(X); | ||||
|       Block *iterationBlock = loops.getIterateBlock(); | ||||
| 
 | ||||
|       // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
 | ||||
|       rewriter.setInsertionPointToEnd(&optimizationBlock); | ||||
|       // Return from KrnlOptimizeLoopsOp body.
 | ||||
|       // When no optimizations are present we just return the loops
 | ||||
|       // unchaged.
 | ||||
|       rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|       // 2. Insert instructions inside the KernelIterateOp body.
 | ||||
|       rewriter.setInsertionPointToStart(&iterationBlock); | ||||
|       // Insert instructions inside the KernelIterateOp body.
 | ||||
|       rewriter.setInsertionPointToStart(iterationBlock); | ||||
| 
 | ||||
|       // Handle the operation:
 | ||||
|       for (auto arg : iterationBlock.getArguments()) | ||||
|       for (auto arg : iterationBlock->getArguments()) | ||||
|         loopIVs.push_back(arg); | ||||
|     } | ||||
| 
 | ||||
|  | @ -555,7 +545,6 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern { | |||
|     rewriter.create<AffineStoreOp>(loc, loweredOpResult, alloc, loopIVs); | ||||
| 
 | ||||
|     rewriter.replaceOp(op, alloc); | ||||
| 
 | ||||
|     return success(); | ||||
|   } | ||||
| }; | ||||
|  | @ -598,25 +587,16 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern { | |||
|       broadcastedDimInfo = | ||||
|           getBroadcastedDimInfo(loc, rewriter, memRefType, operands); | ||||
| 
 | ||||
|       std::vector<Value> originalLoops; | ||||
|       KrnlOptimizeLoopsOp optimizedLoopsOp; | ||||
|       KrnlIterateOp iterateOp; | ||||
|       emitKrnlLoopsAndIterationForOperand( | ||||
|           rewriter, loc, alloc, originalLoops, optimizedLoopsOp, iterateOp); | ||||
|       Block &optimizationBlock = optimizedLoopsOp.region().front(); | ||||
|       Block &iterationBlock = iterateOp.bodyRegion().front(); | ||||
|       // Create iterateOp & get block within iterate op.
 | ||||
|       BuildKrnlLoop loops(rewriter, loc, memRefType.getRank()); | ||||
|       loops.createDefineAndIterateOp(alloc); | ||||
|       Block *iterationBlock = loops.getIterateBlock(); | ||||
| 
 | ||||
|       // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
 | ||||
|       rewriter.setInsertionPointToEnd(&optimizationBlock); | ||||
|       // Return from KrnlOptimizeLoopsOp body.
 | ||||
|       // When no optimizations are present we just return the loops unchaged.
 | ||||
|       rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|       // 2. Insert instructions inside the KernelIterateOp body.
 | ||||
|       rewriter.setInsertionPointToStart(&iterationBlock); | ||||
|       // Insert instructions inside the KernelIterateOp body.
 | ||||
|       rewriter.setInsertionPointToStart(iterationBlock); | ||||
| 
 | ||||
|       // Handle the operation:
 | ||||
|       for (auto arg : iterationBlock.getArguments()) | ||||
|       for (auto arg : iterationBlock->getArguments()) | ||||
|         loopIVs.push_back(arg); | ||||
|     } | ||||
|     // Fold over operands for each of their scalar values.
 | ||||
|  |  | |||
|  | @ -72,9 +72,7 @@ struct ONNXGemmOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     // Define loops.
 | ||||
|     std::vector<Value> originalLoops; | ||||
|     std::vector<Value> optimizedLoops; | ||||
|     Block *optimizationBlock = | ||||
|         defineLoops(rewriter, loc, originalLoops, optimizedLoops, numLoops); | ||||
|     defineLoops(rewriter, loc, originalLoops, numLoops); | ||||
| 
 | ||||
|     // We have two Krnl loops:
 | ||||
|     // - Outer loop iterates over the output matrix dimensions, and
 | ||||
|  | @ -84,23 +82,18 @@ struct ONNXGemmOpLowering : public ConversionPattern { | |||
|     std::vector<Value> outerLoops, optimizedOuterLoops; | ||||
|     outerLoops.reserve(2); | ||||
|     optimizedOuterLoops.reserve(2); | ||||
|     for (int i = 0; i < 2; ++i) { | ||||
|     for (int i = 0; i < 2; ++i) | ||||
|       outerLoops.push_back(originalLoops[i]); | ||||
|       optimizedOuterLoops.push_back(optimizedLoops[i]); | ||||
|     } | ||||
|     KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops); | ||||
|     KrnlIterateOperandPack outerPack(rewriter, outerLoops); | ||||
|     // Induction variables for the outer loops
 | ||||
|     for (int i = 0; i < 2; ++i) | ||||
|       addDimensionToPack(rewriter, loc, outerPack, alloc, i); | ||||
| 
 | ||||
|     // Reduction loop
 | ||||
|     std::vector<Value> reductionLoops, optimizedReductionLoops; | ||||
|     std::vector<Value> reductionLoops; | ||||
|     reductionLoops.reserve(1); | ||||
|     optimizedReductionLoops.reserve(1); | ||||
|     reductionLoops.push_back(originalLoops[2]); | ||||
|     optimizedReductionLoops.push_back(optimizedLoops[2]); | ||||
|     KrnlIterateOperandPack reductionPack( | ||||
|         rewriter, reductionLoops, optimizedReductionLoops); | ||||
|     KrnlIterateOperandPack reductionPack(rewriter, reductionLoops); | ||||
|     // Induction variable for the reduction dimension
 | ||||
|     // Try to find and use a static value from A or B first.
 | ||||
|     // If it failed then use a dynamic value.
 | ||||
|  | @ -140,10 +133,6 @@ struct ONNXGemmOpLowering : public ConversionPattern { | |||
|     // Now perform the insertions into the body of the
 | ||||
|     // just generated instructions:
 | ||||
| 
 | ||||
|     // No optimization
 | ||||
|     rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|     rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|     // Insert instructions inside the outer loop.
 | ||||
|     Block &outerIterationBlock = outerIterateOp.bodyRegion().front(); | ||||
|     rewriter.setInsertionPointToStart(&outerIterationBlock); | ||||
|  | @ -154,14 +143,15 @@ struct ONNXGemmOpLowering : public ConversionPattern { | |||
|       loopMNIVs.emplace_back(arg); | ||||
|     } | ||||
| 
 | ||||
|     // Initialize the output of A*B
 | ||||
|     // Initialize the output of A * B
 | ||||
|     auto zero = emitConstantOp(rewriter, loc, memRefType.getElementType(), 0); | ||||
|     rewriter.create<AffineStoreOp>(loc, zero, alloc, loopMNIVs); | ||||
| 
 | ||||
|     // Compute A*B
 | ||||
|     // Compute A * B
 | ||||
|     auto matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, reductionPack); | ||||
| 
 | ||||
|     // Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting)
 | ||||
|     // Compute beta * C, and add up to alpha * A * B (unidirectional
 | ||||
|     // broadcasting)
 | ||||
|     auto loadedAB = rewriter.create<AffineLoadOp>(loc, alloc, loopMNIVs); | ||||
|     auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB); | ||||
|     if (hasBias) { | ||||
|  | @ -175,7 +165,7 @@ struct ONNXGemmOpLowering : public ConversionPattern { | |||
|       rewriter.create<AffineStoreOp>(loc, alphaAB, alloc, loopMNIVs); | ||||
|     } | ||||
| 
 | ||||
|     // Insert instructions to do matrix multiplication: A*B
 | ||||
|     // Insert instructions to do matrix multiplication: A * B
 | ||||
|     Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front(); | ||||
|     rewriter.setInsertionPointToStart(&matmulIterationBlock); | ||||
| 
 | ||||
|  |  | |||
|  | @ -117,9 +117,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern { | |||
| 
 | ||||
|       // Define loops for batch dimensions.
 | ||||
|       std::vector<Value> originalLoops; | ||||
|       std::vector<Value> optimizedLoops; | ||||
|       Block *optimizationBlock = defineLoops( | ||||
|           rewriter, loc, originalLoops, optimizedLoops, memRefShape.size()); | ||||
|       defineLoops(rewriter, loc, originalLoops, memRefShape.size()); | ||||
| 
 | ||||
|       // Outer KrnlIterateOp
 | ||||
|       SmallVector<Value, 4> loopBatchIVs; | ||||
|  | @ -131,24 +129,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern { | |||
|         for (int i = 0; i < memRefShape.size() - matmulResultDims; ++i) | ||||
|           batchAxes.emplace_back(i); | ||||
| 
 | ||||
|         std::vector<Value> outerLoops, optimizedOuterLoops; | ||||
|         std::vector<Value> outerLoops; | ||||
|         outerLoops.reserve(batchAxes.size()); | ||||
|         optimizedOuterLoops.reserve(batchAxes.size()); | ||||
|         for (int i = 0; i < batchAxes.size(); ++i) { | ||||
|         for (int i = 0; i < batchAxes.size(); ++i) | ||||
|           outerLoops.push_back(originalLoops[i]); | ||||
|           optimizedOuterLoops.push_back(optimizedLoops[i]); | ||||
|         } | ||||
|         KrnlIterateOperandPack outerPack( | ||||
|             rewriter, outerLoops, optimizedOuterLoops); | ||||
| 
 | ||||
|         KrnlIterateOperandPack outerPack(rewriter, outerLoops); | ||||
|         for (int i = 0; i < batchAxes.size(); ++i) { | ||||
|           addDimensionToPack(rewriter, loc, outerPack, alloc, i); | ||||
|         } | ||||
|         auto outerIterateOp = rewriter.create<KrnlIterateOp>(loc, outerPack); | ||||
| 
 | ||||
|         // No optimization
 | ||||
|         rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|         rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|         // Insert instructions into the outer KrnlIterateOp.
 | ||||
|         Block &outerIterationBlock = outerIterateOp.bodyRegion().front(); | ||||
|         rewriter.setInsertionPointToStart(&outerIterationBlock); | ||||
|  | @ -165,18 +156,14 @@ struct ONNXMatMulOpLowering : public ConversionPattern { | |||
| 
 | ||||
|       // Create a KrnlIterateOp for matrix multiplication.
 | ||||
|       KrnlIterateOp matmulIterateOp; | ||||
|       std::vector<Value> matmulLoops, optimizedMatmulLoops; | ||||
|       std::vector<Value> matmulLoops; | ||||
|       if (AShape.size() >= 2 && BShape.size() >= 2) { | ||||
|         // 2-D x 2-D. Result has two dimensions.
 | ||||
|         matmulLoops.reserve(2); | ||||
|         optimizedMatmulLoops.reserve(2); | ||||
|         for (int i = 2; i > 0; --i) { | ||||
|           matmulLoops.emplace_back(originalLoops[memRefShape.size() - i]); | ||||
|           optimizedMatmulLoops.emplace_back( | ||||
|               optimizedLoops[memRefShape.size() - i]); | ||||
|         } | ||||
|         KrnlIterateOperandPack matmulPack( | ||||
|             rewriter, matmulLoops, optimizedMatmulLoops); | ||||
|         KrnlIterateOperandPack matmulPack(rewriter, matmulLoops); | ||||
|         for (int i = 2; i > 0; --i) { | ||||
|           addDimensionToPack( | ||||
|               rewriter, loc, matmulPack, alloc, memRefShape.size() - i); | ||||
|  | @ -185,23 +172,13 @@ struct ONNXMatMulOpLowering : public ConversionPattern { | |||
|       } else { | ||||
|         // 1-D x 2-D, and vice versa. Result has one dimension.
 | ||||
|         matmulLoops.reserve(1); | ||||
|         optimizedMatmulLoops.reserve(1); | ||||
|         matmulLoops.emplace_back(originalLoops[memRefShape.size() - 1]); | ||||
|         optimizedMatmulLoops.emplace_back( | ||||
|             optimizedLoops[memRefShape.size() - 1]); | ||||
|         KrnlIterateOperandPack matmulPack( | ||||
|             rewriter, matmulLoops, optimizedMatmulLoops); | ||||
|         KrnlIterateOperandPack matmulPack(rewriter, matmulLoops); | ||||
|         addDimensionToPack( | ||||
|             rewriter, loc, matmulPack, alloc, memRefShape.size() - 1); | ||||
|         matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, matmulPack); | ||||
|       } | ||||
| 
 | ||||
|       if (!hasBatchLoop) { | ||||
|         // No optimization
 | ||||
|         rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|         rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
|       } | ||||
| 
 | ||||
|       // Insert instructions into the matmul KrnlIterateOp.
 | ||||
|       Block &matmulIterationBlock = matmulIterateOp.bodyRegion().front(); | ||||
|       rewriter.setInsertionPointToStart(&matmulIterationBlock); | ||||
|  | @ -226,18 +203,11 @@ struct ONNXMatMulOpLowering : public ConversionPattern { | |||
|       //  Iterate along the reduction dimension.
 | ||||
|       //  Use a value from A.
 | ||||
|       std::vector<Value> reduceLoops; | ||||
|       std::vector<Value> optimizedReduceLoops; | ||||
|       Block *optimizationReduceBlock = | ||||
|           defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1); | ||||
|       KrnlIterateOperandPack reducePack( | ||||
|           rewriter, reduceLoops, optimizedReduceLoops); | ||||
|       defineLoops(rewriter, loc, reduceLoops, 1); | ||||
|       KrnlIterateOperandPack reducePack(rewriter, reduceLoops); | ||||
|       addDimensionToPack(rewriter, loc, reducePack, A, AShape.size() - 1); | ||||
|       auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack); | ||||
| 
 | ||||
|       // No optimization
 | ||||
|       rewriter.setInsertionPointToEnd(optimizationReduceBlock); | ||||
|       rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops); | ||||
| 
 | ||||
|       // Insert instructions into the reduction KrnlIterateOp.
 | ||||
|       Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front(); | ||||
|       rewriter.setInsertionPointToStart(&reduceIterationBlock); | ||||
|  | @ -288,18 +258,12 @@ struct ONNXMatMulOpLowering : public ConversionPattern { | |||
|       //  Iterate along the reduction dimension.
 | ||||
|       //  Use a value from A.
 | ||||
|       std::vector<Value> reduceLoops; | ||||
|       std::vector<Value> optimizedReduceLoops; | ||||
|       Block *optimizationReduceBlock = | ||||
|           defineLoops(rewriter, loc, reduceLoops, optimizedReduceLoops, 1); | ||||
|       KrnlIterateOperandPack reducePack( | ||||
|           rewriter, reduceLoops, optimizedReduceLoops); | ||||
| 
 | ||||
|       defineLoops(rewriter, loc, reduceLoops, 1); | ||||
|       KrnlIterateOperandPack reducePack(rewriter, reduceLoops); | ||||
|       addDimensionToPack(rewriter, loc, reducePack, A, 0); | ||||
|       auto reduceIterateOp = rewriter.create<KrnlIterateOp>(loc, reducePack); | ||||
| 
 | ||||
|       // No optimization
 | ||||
|       rewriter.setInsertionPointToEnd(optimizationReduceBlock); | ||||
|       rewriter.create<KrnlReturnLoopsOp>(loc, reduceLoops); | ||||
| 
 | ||||
|       // Insert instructions into the reduction KrnlIterateOp.
 | ||||
|       Block &reduceIterationBlock = reduceIterateOp.bodyRegion().front(); | ||||
|       rewriter.setInsertionPointToStart(&reduceIterationBlock); | ||||
|  |  | |||
|  | @ -183,13 +183,10 @@ struct ONNXReductionOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     // Define loops to initialize the result.
 | ||||
|     std::vector<Value> originalLoopsInit; | ||||
|     std::vector<Value> optimizedLoopsInit; | ||||
|     Block *optimizationBlockInit = defineLoops( | ||||
|         rewriter, loc, originalLoopsInit, optimizedLoopsInit, outRank); | ||||
|     defineLoops(rewriter, loc, originalLoopsInit, outRank); | ||||
| 
 | ||||
|     // Iteration information
 | ||||
|     KrnlIterateOperandPack packInit( | ||||
|         rewriter, originalLoopsInit, optimizedLoopsInit); | ||||
|     KrnlIterateOperandPack packInit(rewriter, originalLoopsInit); | ||||
|     for (decltype(outRank) i = 0; i < outRank; ++i) { | ||||
|       addDimensionToPack(rewriter, loc, packInit, alloc, i); | ||||
|     } | ||||
|  | @ -197,9 +194,6 @@ struct ONNXReductionOpLowering : public ConversionPattern { | |||
|     Block &iterationBlockInit = iterateOpInit.bodyRegion().front(); | ||||
| 
 | ||||
|     // Perform the insertions into the body of the initialization loop.
 | ||||
|     // No optimization
 | ||||
|     rewriter.setInsertionPointToEnd(optimizationBlockInit); | ||||
|     rewriter.create<KrnlReturnLoopsOp>(loc, originalLoopsInit); | ||||
| 
 | ||||
|     // Insert instructions inside the KernelIterateOp body.
 | ||||
|     rewriter.setInsertionPointToStart(&iterationBlockInit); | ||||
|  | @ -216,11 +210,10 @@ struct ONNXReductionOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     // Define an Krnl loop to do reduction.
 | ||||
|     rewriter.setInsertionPointAfter(iterateOpInit); | ||||
|     std::vector<Value> originalLoops, optimizedLoops; | ||||
|     Block *optimizationBlock = | ||||
|         defineLoops(rewriter, loc, originalLoops, optimizedLoops, inRank); | ||||
|     std::vector<Value> originalLoops; | ||||
|     defineLoops(rewriter, loc, originalLoops, inRank); | ||||
|     // Iteration information
 | ||||
|     KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops); | ||||
|     KrnlIterateOperandPack pack(rewriter, originalLoops); | ||||
|     for (decltype(inRank) i = 0; i < inRank; ++i) { | ||||
|       addDimensionToPack(rewriter, loc, pack, operands[0], i); | ||||
|     } | ||||
|  | @ -228,10 +221,6 @@ struct ONNXReductionOpLowering : public ConversionPattern { | |||
|     Block &iterationBlock = iterateOp.bodyRegion().front(); | ||||
| 
 | ||||
|     // Perform the insertions into the body of the reduction loop.
 | ||||
|     // No optimization
 | ||||
|     rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|     rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|     // Insert instructions inside the KernelIterateOp body.
 | ||||
|     rewriter.setInsertionPointToStart(&iterationBlock); | ||||
| 
 | ||||
|  |  | |||
|  | @ -54,9 +54,7 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     // Define loops.
 | ||||
|     std::vector<Value> originalLoops; | ||||
|     std::vector<Value> optimizedLoops; | ||||
|     Block *optimizationBlock = | ||||
|         defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank); | ||||
|     defineLoops(rewriter, loc, originalLoops, rank); | ||||
| 
 | ||||
|     // Coerce the input into a 2-D tensor. `axis` will be the coercing point.
 | ||||
|     // This coercing follows the softmax definition in ONNX:
 | ||||
|  | @ -65,26 +63,22 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { | |||
|     // dimensions. The outer loop is only created once `axis` is not zero.
 | ||||
| 
 | ||||
|     // Define an outer loop with respect to axis.
 | ||||
|     std::vector<Value> outerLoops, optimizedOuterLoops; | ||||
|     std::vector<Value> outerLoops; | ||||
|     outerLoops.reserve(axis); | ||||
|     optimizedOuterLoops.reserve(axis); | ||||
|     for (int i = 0; i < axis; ++i) { | ||||
|       outerLoops.push_back(originalLoops[i]); | ||||
|       optimizedOuterLoops.push_back(optimizedLoops[i]); | ||||
|     } | ||||
|     KrnlIterateOperandPack outerPack(rewriter, outerLoops, optimizedOuterLoops); | ||||
|     KrnlIterateOperandPack outerPack(rewriter, outerLoops); | ||||
|     for (int i = 0; i < axis; ++i) | ||||
|       addDimensionToPack(rewriter, loc, outerPack, input, i); | ||||
| 
 | ||||
|     // Define an inner loop with respect to axis.
 | ||||
|     std::vector<Value> innerLoops, optimizedInnerLoops; | ||||
|     std::vector<Value> innerLoops; | ||||
|     innerLoops.reserve(rank - axis); | ||||
|     optimizedInnerLoops.reserve(rank - axis); | ||||
|     for (int i = axis; i < rank; ++i) { | ||||
|       innerLoops.push_back(originalLoops[i]); | ||||
|       optimizedInnerLoops.push_back(optimizedLoops[i]); | ||||
|     } | ||||
|     KrnlIterateOperandPack innerPack(rewriter, innerLoops, optimizedInnerLoops); | ||||
|     KrnlIterateOperandPack innerPack(rewriter, innerLoops); | ||||
|     for (int i = axis; i < rank; ++i) | ||||
|       addDimensionToPack(rewriter, loc, innerPack, input, i); | ||||
| 
 | ||||
|  | @ -93,10 +87,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { | |||
|     if (axis != 0) { | ||||
|       outerIterateOp = rewriter.create<KrnlIterateOp>(loc, outerPack); | ||||
| 
 | ||||
|       // No optimization
 | ||||
|       rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|       rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|       // Insert instructions inside the outer loop.
 | ||||
|       Block &outerIterationBlock = outerIterateOp.bodyRegion().front(); | ||||
|       rewriter.setInsertionPointToStart(&outerIterationBlock); | ||||
|  | @ -126,10 +116,6 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { | |||
|       sumIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack); | ||||
|       // Create an inner loop to compute softmax.
 | ||||
|       softmaxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack); | ||||
| 
 | ||||
|       // No optimization
 | ||||
|       rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|       rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
|     } | ||||
| 
 | ||||
|     // Insert instructions inside the max loop.
 | ||||
|  |  | |||
|  | @ -107,7 +107,7 @@ struct ONNXConvOpLowering : public ConversionPattern { | |||
|     // 1. Define outer loops and emit empty optimization block:
 | ||||
|     int64_t nOuterLoops = (group > 1) ? 3 : 2; | ||||
|     BuildKrnlLoop outerLoops(rewriter, loc, nOuterLoops); | ||||
|     outerLoops.createDefineAndOptimizeOp(); | ||||
|     outerLoops.createDefineOp(); | ||||
|     //   for n = 0 .. N:
 | ||||
|     int nIndex = outerLoops.pushBounds(0, inputOperand, 0); | ||||
|     //   for g = 0 .. N:
 | ||||
|  | @ -142,7 +142,7 @@ struct ONNXConvOpLowering : public ConversionPattern { | |||
|       // 2.2 Define spatial loops
 | ||||
|       int64_t nSpatialLoops = resultShape.size() - 2; | ||||
|       BuildKrnlLoop spatialLoops(rewriter, loc, nSpatialLoops); | ||||
|       spatialLoops.createDefineAndOptimizeOp(); | ||||
|       spatialLoops.createDefineOp(); | ||||
|       for (int i = 2; i < resultShape.size(); ++i) | ||||
|         spatialLoops.pushBounds(0, alloc, i); | ||||
| 
 | ||||
|  | @ -168,7 +168,7 @@ struct ONNXConvOpLowering : public ConversionPattern { | |||
|         // 3.2 Define inner loops.
 | ||||
|         int64_t nInnerLoops = 1 + (kernelShape.size() - 2); | ||||
|         BuildKrnlLoop innerLoops(rewriter, loc, nInnerLoops); | ||||
|         innerLoops.createDefineAndOptimizeOp(); | ||||
|         innerLoops.createDefineOp(); | ||||
|         //   for c = 0 .. C/group
 | ||||
|         int cIndex = innerLoops.pushBounds(0, kernelShape[1]); | ||||
|         //   for Kx = 0 .. KX
 | ||||
|  |  | |||
|  | @ -57,9 +57,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { | |||
|     int64_t rank = memRefType.getRank(); | ||||
| 
 | ||||
|     std::vector<Value> originalLoops; | ||||
|     std::vector<Value> optimizedLoops; | ||||
|     Block *optimizationBlock = | ||||
|         defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank); | ||||
|     defineLoops(rewriter, loc, originalLoops, rank); | ||||
| 
 | ||||
|     // Create a KrnlIterateOp along C dimension.
 | ||||
|     // This will be the outer-most loop in order to re-use scale, bias,
 | ||||
|  | @ -67,8 +65,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     SmallVector<Value, 1> loopCIVs; | ||||
|     if (rank > 1) { | ||||
|       KrnlIterateOperandPack cPack( | ||||
|           rewriter, originalLoops[1], optimizedLoops[1]); | ||||
|       KrnlIterateOperandPack cPack(rewriter, originalLoops[1]); | ||||
|       addDimensionToPack(rewriter, loc, cPack, operand, 1); | ||||
|       auto cIterateOp = rewriter.create<KrnlIterateOp>(loc, cPack); | ||||
|       Block &cIterationBlock = cIterateOp.bodyRegion().front(); | ||||
|  | @ -89,21 +86,16 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { | |||
|     axes.emplace_back(0); | ||||
|     for (int64_t i = 2; i < rank; ++i) | ||||
|       axes.emplace_back(i); | ||||
|     std::vector<Value> packLoops, packOptimizedLoops; | ||||
|     std::vector<Value> packLoops; | ||||
|     for (int i = 0; i < axes.size(); ++i) { | ||||
|       packLoops.emplace_back(originalLoops[axes[i]]); | ||||
|       packOptimizedLoops.emplace_back(optimizedLoops[axes[i]]); | ||||
|     } | ||||
|     KrnlIterateOperandPack pack(rewriter, packLoops, packOptimizedLoops); | ||||
|     KrnlIterateOperandPack pack(rewriter, packLoops); | ||||
|     for (int i = 0; i < axes.size(); ++i) { | ||||
|       addDimensionToPack(rewriter, loc, pack, operand, axes[i]); | ||||
|     } | ||||
|     auto iterateOp = rewriter.create<KrnlIterateOp>(loc, pack); | ||||
| 
 | ||||
|     // No optimization
 | ||||
|     rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|     rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|     Block &iterationBlock = iterateOp.bodyRegion().front(); | ||||
|     rewriter.setInsertionPointToStart(&iterationBlock); | ||||
| 
 | ||||
|  |  | |||
|  | @ -332,7 +332,7 @@ struct ONNXPoolOpLowering : public ConversionPattern { | |||
|     //     for ho in range(HO):
 | ||||
|     //       for wo in range(WO):
 | ||||
|     BuildKrnlLoop outputLoops(rewriter, loc, outputShape.size()); | ||||
|     outputLoops.createDefineOptimizeAndIterateOp(alloc); | ||||
|     outputLoops.createDefineAndIterateOp(alloc); | ||||
| 
 | ||||
|     auto ipMainRegion = rewriter.saveInsertionPoint(); | ||||
|     rewriter.setInsertionPointToStart(outputLoops.getIterateBlock()); | ||||
|  | @ -475,7 +475,7 @@ struct ONNXPoolOpLowering : public ConversionPattern { | |||
|       //      output[n][c][ho][wo] =
 | ||||
|       //        emitScalarOpFor(output[n][c][ho][wo], input[n, c, hi, wi]);
 | ||||
|       BuildKrnlLoop poolingLoops(rewriter, loc, kernelShape.size()); | ||||
|       poolingLoops.createDefineAndOptimizeOp(); | ||||
|       poolingLoops.createDefineOp(); | ||||
|       for (int i = 0; i < kernelShape.size(); ++i) | ||||
|         poolingLoops.pushBounds( | ||||
|             0, poolDimMap, llvm::makeArrayRef(IVsAndConstants[i])); | ||||
|  |  | |||
|  | @ -190,59 +190,13 @@ void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc, | |||
|   } | ||||
| } | ||||
| 
 | ||||
| // Function that defines the KRNL dialect loops and their respective
 | ||||
| // optimized version.
 | ||||
| KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter, | ||||
|     Location loc, std::vector<Value> &loops, std::vector<Value> &optimizedLoops, | ||||
|     int64_t numLoops) { | ||||
|   // Define loops.
 | ||||
| // Function that emits the definition of loops references.
 | ||||
| void defineLoops(ConversionPatternRewriter &rewriter, Location loc, | ||||
|     std::vector<Value> &loops, int64_t numLoops) { | ||||
|   auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, numLoops); | ||||
|   loops.reserve(numLoops); | ||||
|   for (auto result : loopsOp.getResults()) | ||||
|     loops.push_back(result); | ||||
| 
 | ||||
|   // Define optimized version of the loops.
 | ||||
|   auto optimizedLoopsOp = rewriter.create<KrnlOptimizeLoopsOp>(loc, numLoops); | ||||
|   optimizedLoops.reserve(numLoops); | ||||
|   for (auto result : optimizedLoopsOp.getResults()) | ||||
|     optimizedLoops.push_back(result); | ||||
| 
 | ||||
|   return optimizedLoopsOp; | ||||
| } | ||||
| 
 | ||||
| // Function that emits the loops and their optimized version.
 | ||||
| // The function returns a reference to the inner optimization block.
 | ||||
| Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc, | ||||
|     std::vector<Value> &loops, std::vector<Value> &optimizedLoops, | ||||
|     int64_t numLoops) { | ||||
|   KrnlOptimizeLoopsOp optimizedLoopsOp = | ||||
|       emitOptimizedLoops(rewriter, loc, loops, optimizedLoops, numLoops); | ||||
|   return &optimizedLoopsOp.region().front(); | ||||
| } | ||||
| 
 | ||||
| // Function which emits a basic set of loops and optimized loops
 | ||||
| // for a given operation argument. A reference to the loop optimization
 | ||||
| // block is returned in the last argument of the function.
 | ||||
| void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter, | ||||
|     Location loc, Value operand, std::vector<Value> &originalLoops, | ||||
|     KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp) { | ||||
|   // Operand shape.
 | ||||
|   auto shape = operand.getType().cast<MemRefType>().getShape(); | ||||
| 
 | ||||
|   // Number of loops.
 | ||||
|   int64_t rank = shape.size(); | ||||
| 
 | ||||
|   // Define loops and optimized loops.
 | ||||
|   std::vector<Value> optimizedLoops; | ||||
|   optimizedLoopsOp = | ||||
|       emitOptimizedLoops(rewriter, loc, originalLoops, optimizedLoops, rank); | ||||
| 
 | ||||
|   KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops); | ||||
|   // Iterate over the loop nest.
 | ||||
|   for (int i = 0; i < rank; ++i) | ||||
|     addDimensionToPack(rewriter, loc, pack, operand, i); | ||||
| 
 | ||||
|   iterateOp = rewriter.create<KrnlIterateOp>(loc, pack); | ||||
| } | ||||
| 
 | ||||
| unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { | ||||
|  |  | |||
|  | @ -63,24 +63,10 @@ std::map<int64_t, int64_t> getReductionMapping( | |||
| void addDimensionToPack(ConversionPatternRewriter &rewriter, Location loc, | ||||
|     KrnlIterateOperandPack &pack, Value operand, int index); | ||||
| 
 | ||||
| // Function that defines the KRNL dialect loops and their respective
 | ||||
| // optimized version.
 | ||||
| KrnlOptimizeLoopsOp emitOptimizedLoops(ConversionPatternRewriter &rewriter, | ||||
|     Location loc, std::vector<Value> &loops, std::vector<Value> &optimizedLoops, | ||||
|     int64_t numLoops); | ||||
| 
 | ||||
| // Function that emits the loops and their optimized version.
 | ||||
| // The function returns a reference to the inner optimization block.
 | ||||
| Block *defineLoops(ConversionPatternRewriter &rewriter, Location loc, | ||||
|     std::vector<Value> &loops, std::vector<Value> &optimizedLoops, | ||||
|     int64_t numLoops); | ||||
| 
 | ||||
| // Function which emits a basic set of loops and optimized loops
 | ||||
| // for a given operation argument. A reference to the loop optimization
 | ||||
| // block is returned in the last argument of the function.
 | ||||
| void emitKrnlLoopsAndIterationForOperand(ConversionPatternRewriter &rewriter, | ||||
|     Location loc, Value operand, std::vector<Value> &originalLoops, | ||||
|     KrnlOptimizeLoopsOp &optimizedLoopsOp, KrnlIterateOp &iterateOp); | ||||
| // Function that emits the define_loop operation to define `numLoops`
 | ||||
| // number of krnl loops, and fill `loop` with the newly defined loops.
 | ||||
| void defineLoops(ConversionPatternRewriter &rewriter, Location loc, | ||||
|     std::vector<Value> &loops, int64_t numLoops); | ||||
| 
 | ||||
| unsigned getMemRefEltSizeInBytes(MemRefType memRefType); | ||||
| 
 | ||||
|  |  | |||
|  | @ -212,7 +212,7 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>( | |||
|       operandAdaptor.X().getType().cast<ShapedType>().getElementType(), 0); | ||||
|   int nLoops = 3; | ||||
|   BuildKrnlLoop initializationLoops(rewriter, loc, nLoops); | ||||
|   initializationLoops.createDefineOptimizeAndIterateOp(state.ht); | ||||
|   initializationLoops.createDefineAndIterateOp(state.ht); | ||||
|   auto ipInitializationLoops = rewriter.saveInsertionPoint(); | ||||
|   rewriter.setInsertionPointToStart(initializationLoops.getIterateBlock()); | ||||
|   { | ||||
|  | @ -292,7 +292,7 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>( | |||
|   //     compute it, ft, ct, Ct, ot, Ht
 | ||||
| 
 | ||||
|   BuildKrnlLoop stateLoops(rewriter, loc, 2); | ||||
|   stateLoops.createDefineAndOptimizeOp(); | ||||
|   stateLoops.createDefineOp(); | ||||
|   stateLoops.pushBounds(0, batchDimSize); | ||||
|   stateLoops.pushBounds(0, hiddenDimSize); | ||||
|   stateLoops.createIterateOp(); | ||||
|  | @ -372,7 +372,7 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>( | |||
|     { // Emit instructions for matrix multiplications.
 | ||||
|       // input_size is the reduction dimension.
 | ||||
|       BuildKrnlLoop reductionLoops(rewriter, loc, 1); | ||||
|       reductionLoops.createDefineAndOptimizeOp(); | ||||
|       reductionLoops.createDefineOp(); | ||||
|       reductionLoops.pushBounds(0, inputDimSize); | ||||
|       reductionLoops.createIterateOp(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -93,7 +93,7 @@ struct ONNXRNNOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     if (direction == FORWARD || direction == BIDIRECTIONAL) { | ||||
|       BuildKrnlLoop sequenceLoops(rewriter, loc, 1); | ||||
|       sequenceLoops.createDefineAndOptimizeOp(); | ||||
|       sequenceLoops.createDefineOp(); | ||||
|       sequenceLoops.pushBounds(0, sequenceDimSize); | ||||
|       sequenceLoops.createIterateOp(); | ||||
| 
 | ||||
|  | @ -112,7 +112,7 @@ struct ONNXRNNOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     if (direction == REVERSE || direction == BIDIRECTIONAL) { | ||||
|       BuildKrnlLoop sequenceLoops(rewriter, loc, 1); | ||||
|       sequenceLoops.createDefineAndOptimizeOp(); | ||||
|       sequenceLoops.createDefineOp(); | ||||
|       sequenceLoops.pushBounds(0, sequenceDimSize); | ||||
|       sequenceLoops.createIterateOp(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -46,7 +46,7 @@ struct ONNXConcatOpLowering : public ConversionPattern { | |||
|       auto currShape = operands[i].getType().cast<MemRefType>().getShape(); | ||||
|       // Create loop.
 | ||||
|       BuildKrnlLoop inputLoops(rewriter, loc, rank); | ||||
|       inputLoops.createDefineAndOptimizeOp(); | ||||
|       inputLoops.createDefineOp(); | ||||
|       for (int r = 0; r < rank; ++r) | ||||
|         inputLoops.pushBounds(0, operands[i], r); | ||||
|       inputLoops.createIterateOp(); | ||||
|  |  | |||
|  | @ -63,14 +63,14 @@ struct ONNXPadOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     // Iterate over the loop nest using the output shape.
 | ||||
|     BuildKrnlLoop padLoops(rewriter, loc, rank); | ||||
|     padLoops.createDefineAndOptimizeOp(); | ||||
|     padLoops.createDefineOp(); | ||||
|     for (int i = 0; i < rank; ++i) | ||||
|       padLoops.pushBounds(0, alloc, i); | ||||
|     padLoops.createIterateOp(); | ||||
| 
 | ||||
|     // Iterate over the loop nest using the input shape.
 | ||||
|     BuildKrnlLoop valueLoops(rewriter, loc, rank); | ||||
|     valueLoops.createDefineAndOptimizeOp(); | ||||
|     valueLoops.createDefineOp(); | ||||
|     for (int i = 0; i < rank; ++i) | ||||
|       valueLoops.pushBounds(0, operandAdaptor.data(), i); | ||||
|     valueLoops.createIterateOp(); | ||||
|  |  | |||
|  | @ -46,14 +46,14 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     // Iterate over the loop nest using the output shape.
 | ||||
|     BuildKrnlLoop padLoops(rewriter, loc, rank); | ||||
|     padLoops.createDefineAndOptimizeOp(); | ||||
|     padLoops.createDefineOp(); | ||||
|     for (int i = 0; i < rank; ++i) | ||||
|       padLoops.pushBounds(0, alloc, i); | ||||
|     padLoops.createIterateOp(); | ||||
| 
 | ||||
|     // Iterate over the loop nest using the input shape.
 | ||||
|     BuildKrnlLoop valueLoops(rewriter, loc, rank); | ||||
|     valueLoops.createDefineAndOptimizeOp(); | ||||
|     valueLoops.createDefineOp(); | ||||
|     for (int i = 0; i < rank; ++i) | ||||
|       valueLoops.pushBounds(0, operandAdaptor.data(), i); | ||||
|     valueLoops.createIterateOp(); | ||||
|  |  | |||
|  | @ -70,7 +70,7 @@ struct ONNXSplitOpLowering : public ConversionPattern { | |||
|       OpBuilder::InsertionGuard insertGuard(rewriter); | ||||
|       // Create loop.
 | ||||
|       BuildKrnlLoop outputLoops(rewriter, loc, rank); | ||||
|       outputLoops.createDefineOptimizeAndIterateOp(allocs[i]); | ||||
|       outputLoops.createDefineAndIterateOp(allocs[i]); | ||||
|       outputLoops.createIterateOp(); | ||||
|       rewriter.setInsertionPointToStart(outputLoops.getIterateBlock()); | ||||
|       // Indices for the read and write.
 | ||||
|  |  | |||
|  | @ -38,11 +38,9 @@ struct ONNXTransposeOpLowering : public ConversionPattern { | |||
| 
 | ||||
|     // Define loops.
 | ||||
|     std::vector<Value> originalLoops; | ||||
|     std::vector<Value> optimizedLoops; | ||||
|     Block *optimizationBlock = | ||||
|         defineLoops(rewriter, loc, originalLoops, optimizedLoops, rank); | ||||
|     defineLoops(rewriter, loc, originalLoops, rank); | ||||
| 
 | ||||
|     KrnlIterateOperandPack pack(rewriter, originalLoops, optimizedLoops); | ||||
|     KrnlIterateOperandPack pack(rewriter, originalLoops); | ||||
|     // Iterate over the loop nest using the input shape.
 | ||||
|     for (int i = 0; i < rank; ++i) | ||||
|       addDimensionToPack(rewriter, loc, pack, data, i); | ||||
|  | @ -53,14 +51,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern { | |||
|     // Now perform the insertions into the body of the
 | ||||
|     // just generated instructions:
 | ||||
| 
 | ||||
|     // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
 | ||||
|     rewriter.setInsertionPointToEnd(optimizationBlock); | ||||
|     // Return from KrnlOptimizeLoopsOp body.
 | ||||
|     // When no optimizations are present we just return the loops
 | ||||
|     // unchaged.
 | ||||
|     rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
| 
 | ||||
|     // 2. Insert instructions inside the KernelIterateOp body.
 | ||||
|     // Insert instructions inside the KernelIterateOp body.
 | ||||
|     rewriter.setInsertionPointToStart(&iterationBlock); | ||||
| 
 | ||||
|     // Handle the operation.
 | ||||
|  |  | |||
|  | @ -161,8 +161,7 @@ void KrnlIterateOperandPack::pushAffineMapBound( | |||
| BuildKrnlLoop::BuildKrnlLoop( | ||||
|     ConversionPatternRewriter &rewriter, Location loc, int loopNum) | ||||
|     : rewriter(rewriter), loc(loc), originalLoopNum(loopNum), pack(NULL), | ||||
|       pushCount(0), createdDefineOp(false), createdOptimizeOp(false), | ||||
|       createdIterateOp(false) { | ||||
|       pushCount(0), createdDefineOp(false), createdIterateOp(false) { | ||||
|   if (originalLoopNum <= 0) | ||||
|     emitError(loc, "Expected positive number of original loops."); | ||||
| } | ||||
|  | @ -177,7 +176,7 @@ BuildKrnlLoop::~BuildKrnlLoop() { | |||
|     free(pack); | ||||
| } | ||||
| 
 | ||||
| void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) { | ||||
| void BuildKrnlLoop::createDefineOp() { | ||||
|   // Insert define loop operation.
 | ||||
|   auto loopsOp = rewriter.create<KrnlDefineLoopsOp>(loc, originalLoopNum); | ||||
|   originalLoops.reserve(originalLoopNum); | ||||
|  | @ -185,25 +184,8 @@ void BuildKrnlLoop::createDefineAndOptimizeOp(bool withEmptyOptimization) { | |||
|     originalLoops.push_back(result); | ||||
|   createdDefineOp = true; | ||||
| 
 | ||||
|   // Insert optimize loop operation.
 | ||||
|   auto optimizedLoopsOp = | ||||
|       rewriter.create<KrnlOptimizeLoopsOp>(loc, originalLoopNum); | ||||
|   optLoops.reserve(originalLoopNum); | ||||
| 
 | ||||
|   // Emit empty optimizations if flag is set.
 | ||||
|   if (withEmptyOptimization) { | ||||
|     for (auto result : optimizedLoopsOp.getResults()) | ||||
|       optLoops.push_back(result); | ||||
|     optBlock = &optimizedLoopsOp.region().front(); | ||||
|     auto ip = rewriter.saveInsertionPoint(); | ||||
|     rewriter.setInsertionPointToEnd(optBlock); | ||||
|     rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); | ||||
|     rewriter.restoreInsertionPoint(ip); | ||||
|   } | ||||
|   createdOptimizeOp = true; | ||||
| 
 | ||||
|   // prepare data structure to push bounds
 | ||||
|   pack = new KrnlIterateOperandPack(rewriter, originalLoops, optLoops); | ||||
|   pack = new KrnlIterateOperandPack(rewriter, originalLoops); | ||||
| } | ||||
| 
 | ||||
| int BuildKrnlLoop::pushBounds(int64_t lowerBound, int64_t upperBound) { | ||||
|  | @ -254,9 +236,6 @@ void BuildKrnlLoop::createIterateOp() { | |||
|   // Loop definition operation is mandatory.
 | ||||
|   assert(createdDefineOp && "Must create define op before iterate op."); | ||||
| 
 | ||||
|   // Loop optimization operation is mandatory (for now).
 | ||||
|   assert(createdOptimizeOp && "Must create optimize op before iterate op."); | ||||
| 
 | ||||
|   // Check if all bounds have been defined.
 | ||||
|   assert(pushCount == originalLoopNum && | ||||
|          "Must push bounds for all original loops."); | ||||
|  | @ -267,15 +246,14 @@ void BuildKrnlLoop::createIterateOp() { | |||
|   createdIterateOp = true; | ||||
| } | ||||
| 
 | ||||
| void BuildKrnlLoop::createDefineOptimizeAndIterateOp( | ||||
|     Value memRefOperand, bool withEmptyOptimization) { | ||||
| void BuildKrnlLoop::createDefineAndIterateOp(Value memRefOperand) { | ||||
|   // Rank of the MemRef operand. We will emit a loop for each dimension.
 | ||||
|   int loopNum = memRefOperand.getType().cast<MemRefType>().getShape().size(); | ||||
|   assert(originalLoopNum == loopNum && | ||||
|          "Mismatch in loop numbers from constructor and define."); | ||||
| 
 | ||||
|   // Emit the definition and the optimization operations for the loop nest.
 | ||||
|   createDefineAndOptimizeOp(withEmptyOptimization); | ||||
|   createDefineOp(); | ||||
| 
 | ||||
|   // Push a lower-upper bound pair for each dimension of the MemRef operand.
 | ||||
|   // The lower bound in this case is always zero.
 | ||||
|  |  | |||
|  | @ -83,6 +83,13 @@ struct KrnlIterateOperandPack { | |||
|         _operands.end(), optimizedLoops.begin(), optimizedLoops.end()); | ||||
|   } | ||||
| 
 | ||||
|   // Create a pack with optimizedLoops = inputLoops (ie., no optimization).
 | ||||
|   KrnlIterateOperandPack( | ||||
|       mlir::Builder &builder, llvm::ArrayRef<mlir::Value> inputLoops) | ||||
|       : builder(builder), inputLoops(inputLoops), optimizedLoops(inputLoops) { | ||||
|     _operands.insert(_operands.end(), inputLoops.begin(), inputLoops.end()); | ||||
|   } | ||||
| 
 | ||||
|   void pushConstantBound(int64_t bound); | ||||
| 
 | ||||
|   void pushOperandBound(mlir::Value operand); | ||||
|  | @ -112,19 +119,15 @@ private: | |||
| }; | ||||
| 
 | ||||
| // Helper function to write kernel loops. This class will let us build a single
 | ||||
| // define/optimize/iterate operation combo. We can then insert optimizations in
 | ||||
| // the body of the optimization operation, and operations in the body of the
 | ||||
| // iterate operation.
 | ||||
| // define/iterate operation combo. We can then insert operations in the body of
 | ||||
| // the iterate operation.
 | ||||
| //
 | ||||
| // The sequence is as follow:
 | ||||
| //
 | ||||
| //   1) Create an object giving the rewriter, location, and number of loop in
 | ||||
| //   the original (non optimized) loop.
 | ||||
| //
 | ||||
| //   2) Create define & optimize ops (currently paired). Optimizations can then
 | ||||
| //   be added to the inner block of the optimize operation. Make sure to set
 | ||||
| //   the insertion point to that block for optimizations to go in the right
 | ||||
| //   place.
 | ||||
| //   2) Create define_loops ops to define new loop variables.
 | ||||
| //
 | ||||
| //   3) Push the bounds for each of the original loops. Bounds are pushed in
 | ||||
| //   pairs (lower & upper bounds). There are a few methods to do it depending
 | ||||
|  | @ -153,7 +156,7 @@ public: | |||
|   // Create define and optimize loop with loopNum original loops. If
 | ||||
|   // withEmptyOptimization is true, the optimization is simply the identity
 | ||||
|   // function (no optimizations).
 | ||||
|   void createDefineAndOptimizeOp(bool withEmptyOptimization = true); | ||||
|   void createDefineOp(); | ||||
| 
 | ||||
|   // Push bounds (lower and upper) for each of the loops (order matters).
 | ||||
|   // The function returns the order number associated with the loop iteration.
 | ||||
|  | @ -172,13 +175,12 @@ public: | |||
|   // operations associated with this loop nest have been emitted already.
 | ||||
|   void createIterateOp(); | ||||
| 
 | ||||
|   // Create the loop nest definition, optimization and iteration operations
 | ||||
|   // Create the loop nest definition and iteration operations
 | ||||
|   // for a given operand of MemRef type. The loop nest has a depth equal to the
 | ||||
|   // rank of the MemRef operand. The lower bound of each loop is zero. The
 | ||||
|   // upper bound of each loop is given by the corresponding dimension of the
 | ||||
|   // MemRef operand.
 | ||||
|   void createDefineOptimizeAndIterateOp( | ||||
|       Value memRefOperand, bool withEmptyOptimization = true); | ||||
|   void createDefineAndIterateOp(Value memRefOperand); | ||||
| 
 | ||||
|   // Get the (original loop) induction variable associated with the given
 | ||||
|   // index. Use the index returned when pushing the bounds.
 | ||||
|  | @ -220,7 +222,6 @@ private: | |||
| 
 | ||||
|   // Flags that keep track of emitted operations.
 | ||||
|   bool createdDefineOp; | ||||
|   bool createdOptimizeOp; | ||||
|   bool createdIterateOp; | ||||
| 
 | ||||
|   // Saved insertion point in the code region of the KrnlOptimizeLoopsOp.
 | ||||
|  |  | |||
|  | @ -78,47 +78,6 @@ ParseResult parseKrnlDefineLoopsOp( | |||
|   return success(); | ||||
| } | ||||
| 
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| // KrnlOptimizeLoopsOp
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| void KrnlOptimizeLoopsOp::build( | ||||
|     OpBuilder &builder, OperationState &result, int num_optimized_loops) { | ||||
|   result.types.append(num_optimized_loops, LoopType::get(builder.getContext())); | ||||
|   // Create a region and a block for the body.
 | ||||
|   // Schedule intrinsics will be placed into this region.
 | ||||
|   Region *region = result.addRegion(); | ||||
|   auto *body = new Block(); | ||||
|   region->push_back(body); | ||||
| } | ||||
| 
 | ||||
| void print(OpAsmPrinter &p, KrnlOptimizeLoopsOp &op) { | ||||
|   p << "krnl.optimize_loops "; | ||||
|   p.printRegion(op.region(), /*printEntryBlockArgs=*/false, | ||||
|       /*printBlockTerminators=*/true); | ||||
|   p << " : "; | ||||
|   p.printFunctionalType(op); | ||||
| } | ||||
| 
 | ||||
| ParseResult parseKrnlOptimizeLoopsOp( | ||||
|     OpAsmParser &parser, OperationState &result) { | ||||
|   // Parse the schedule body region.
 | ||||
|   Region *region = result.addRegion(); | ||||
|   if (parser.parseRegion(*region, llvm::None, llvm::None)) | ||||
|     return failure(); | ||||
| 
 | ||||
|   // Parse the function type for the schedule operation.
 | ||||
|   // Then following the hint of this parsed function type, parse the
 | ||||
|   // returned timestamp space dimension handlers.
 | ||||
|   FunctionType schedule_func_type; | ||||
|   if (parser.parseColonType(schedule_func_type) || | ||||
|       parser.addTypesToList(schedule_func_type.getResults(), result.types)) { | ||||
|     failure(); | ||||
|   } | ||||
| 
 | ||||
|   return success(); | ||||
| } | ||||
| 
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| // KrnlIterateOp
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
|  | @ -340,26 +299,9 @@ static LogicalResult verify(KrnlIterateOp op) { | |||
| } | ||||
| 
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| // KrnlReturnLoopsOp
 | ||||
| // KrnlEntryPointOp
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| void print(OpAsmPrinter &p, KrnlReturnLoopsOp &op) { | ||||
|   p << "krnl.return_loops "; | ||||
|   p.printOperands(op.operand_begin(), op.operand_end()); | ||||
| } | ||||
| 
 | ||||
| ParseResult parseKrnlReturnLoopsOp( | ||||
|     OpAsmParser &parser, OperationState &result) { | ||||
|   // Parse the loops to return.
 | ||||
|   SmallVector<OpAsmParser::OperandType, 4> timestamp_dim_handlers; | ||||
|   if (parser.parseOperandList(timestamp_dim_handlers) || | ||||
|       parser.resolveOperands(timestamp_dim_handlers, | ||||
|           LoopType::get(result.getContext()), result.operands)) | ||||
|     return failure(); | ||||
| 
 | ||||
|   return success(); | ||||
| } | ||||
| 
 | ||||
| void KrnlEntryPointOp::build(mlir::OpBuilder &builder, OperationState &state, | ||||
|     SymbolRefAttr funcAttr, IntegerAttr numInputs, IntegerAttr numOutputs) { | ||||
|   state.addAttribute(KrnlEntryPointOp::getEntryPointFuncAttrName(), funcAttr); | ||||
|  |  | |||
|  | @ -48,32 +48,6 @@ def KrnlDefineLoopsOp : Op<Krnl_Dialect, "define_loops"> { | |||
| }]; | ||||
| } | ||||
| 
 | ||||
| def KrnlOptimizeLoopsOp : Op<Krnl_Dialect, "optimize_loops"> { | ||||
|   let summary = "optimize_loops operation"; | ||||
|   let description = [{ | ||||
|     The "krnl.optimize_loops" operation is essentially a cosmetic operation | ||||
|     which exists to encapsulate a region where loops are being scheduled / | ||||
|     optimized. | ||||
| 
 | ||||
|     The optimized loops are returned at the end of the region associated with | ||||
|     the krnl.optimize_loops operation. | ||||
| 
 | ||||
|     For example : TBD once we have actual schedule intrinsics. | ||||
|   }]; | ||||
| 
 | ||||
|   let arguments = (ins Variadic<AnyType>); | ||||
|   let results = (outs Variadic<AnyType>); | ||||
|   let regions = (region SizedRegion<1>:$region); | ||||
| 
 | ||||
|   let skipDefaultBuilders = 1; | ||||
| 
 | ||||
|   let builders = [ OpBuilder<"OpBuilder &builder, OperationState &result, " | ||||
|                              "int timestamp_space_rank"> ]; | ||||
| 
 | ||||
|   let printer = [{ return ::print(p, *this); }]; | ||||
|   let parser = [{ return ::parse$cppClass(parser, result); }]; | ||||
| } | ||||
| 
 | ||||
| def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator, AffineScope]> { | ||||
|   let summary = "iterate operation"; | ||||
|   let description = [{ | ||||
|  | @ -129,19 +103,6 @@ def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator, AffineS | |||
|     let verifier = [{ return ::verify(*this); }]; | ||||
| } | ||||
| 
 | ||||
| def KrnlReturnLoopsOp : Op<Krnl_Dialect, "return_loops", [Terminator]> { | ||||
|   let summary = "Krnl return handler operation"; | ||||
|   let description = [{ | ||||
|     Krnl return_loops operation is a terminator operation for returning | ||||
|     scheduled dimension handlers in the krnl.optimize_loops region. | ||||
|   }]; | ||||
| 
 | ||||
|   let arguments = (ins Variadic<AnyType>); | ||||
| 
 | ||||
|   let printer = [{ return ::print(p, *this); }]; | ||||
|   let parser = [{ return ::parse$cppClass(parser, result); }]; | ||||
| } | ||||
| 
 | ||||
| def KrnlTerminatorOp : Op<Krnl_Dialect, "terminate", [Terminator]> { | ||||
|   let summary = "Krnl terminator operation"; | ||||
|   let description = [{ | ||||
|  |  | |||
|  | @ -125,21 +125,6 @@ public: | |||
| // Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| class KrnlOptimizeLoopsLowering : public OpRewritePattern<KrnlOptimizeLoopsOp> { | ||||
| public: | ||||
|   using OpRewritePattern<KrnlOptimizeLoopsOp>::OpRewritePattern; | ||||
| 
 | ||||
|   LogicalResult matchAndRewrite( | ||||
|       KrnlOptimizeLoopsOp op, PatternRewriter &rewriter) const override { | ||||
|     rewriter.eraseOp(op); | ||||
|     return success(); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| // Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| class KrnlBlockOpLowering : public OpRewritePattern<KrnlBlockOp> { | ||||
| public: | ||||
|   using OpRewritePattern<KrnlBlockOp>::OpRewritePattern; | ||||
|  | @ -151,21 +136,6 @@ public: | |||
|   } | ||||
| }; | ||||
| 
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| // Krnl to Affine Rewrite Patterns: KrnlOptimizeLoops operation.
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| class KrnlReturnLoopOpLowering : public OpRewritePattern<KrnlReturnLoopsOp> { | ||||
| public: | ||||
|   using OpRewritePattern<KrnlReturnLoopsOp>::OpRewritePattern; | ||||
| 
 | ||||
|   LogicalResult matchAndRewrite( | ||||
|       KrnlReturnLoopsOp op, PatternRewriter &rewriter) const override { | ||||
|     rewriter.eraseOp(op); | ||||
|     return success(); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| // KrnlToAffineLoweringPass
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
|  | @ -230,14 +200,11 @@ void KrnlToAffineLoweringPass::runOnFunction() { | |||
| 
 | ||||
|   OwningRewritePatternList patterns; | ||||
|   patterns.insert<KrnlTerminatorLowering, KrnlDefineLoopsLowering, | ||||
|       KrnlOptimizeLoopsLowering, KrnlBlockOpLowering, KrnlReturnLoopOpLowering>( | ||||
|       &getContext()); | ||||
|       KrnlBlockOpLowering>(&getContext()); | ||||
| 
 | ||||
|   // Do not lower operations that pertain to schedules just yet.
 | ||||
|   target.addLegalOp<KrnlBlockOp>(); | ||||
|   target.addLegalOp<KrnlDefineLoopsOp>(); | ||||
|   target.addLegalOp<KrnlOptimizeLoopsOp>(); | ||||
|   target.addLegalOp<KrnlReturnLoopsOp>(); | ||||
|   if (failed(applyPartialConversion(function, target, patterns))) | ||||
|     return signalPassFailure(); | ||||
| 
 | ||||
|  | @ -312,8 +279,6 @@ void KrnlToAffineLoweringPass::runOnFunction() { | |||
|   // Remove/lower schedule related operations.
 | ||||
|   target.addIllegalOp<KrnlDefineLoopsOp>(); | ||||
|   target.addIllegalOp<KrnlBlockOp>(); | ||||
|   target.addIllegalOp<KrnlOptimizeLoopsOp>(); | ||||
|   target.addIllegalOp<KrnlReturnLoopsOp>(); | ||||
|   if (failed(applyPartialConversion(function, target, patterns))) | ||||
|     return signalPassFailure(); | ||||
| } | ||||
|  |  | |||
|  | @ -12,9 +12,6 @@ | |||
| 
 | ||||
| func @simple_iterate(%N : index) { | ||||
|   %ii, %ij, %ik = krnl.define_loops 3 | ||||
|   %oi, %oj, %ok = krnl.optimize_loops  { | ||||
|     krnl.return_loops %ii, %ij, %ik | ||||
|   } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) | ||||
| 
 | ||||
|   // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { | ||||
|   // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): | ||||
|  | @ -22,18 +19,18 @@ func @simple_iterate(%N : index) { | |||
|   // GENERIC-NEXT: bounds = [#{{.*}}, #{{.*}}, #{{.*}}, #{{.*}}] | ||||
| 
 | ||||
|   // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 1 to 11) { | ||||
|   krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) { | ||||
|   krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 1 to 11) { | ||||
| 
 | ||||
|   } | ||||
| 
 | ||||
|   // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { | ||||
|   // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): | ||||
|   // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) { | ||||
|   krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) { | ||||
|   krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to 10, %ij -> %j = 0 to 10) { | ||||
|     // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}) ( { | ||||
|     // GENERIC-NEXT: ^bb0(%{{.*}}: index): | ||||
|     // CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10) { | ||||
|     krnl.iterate(%ok) with (%ik -> %k = 0 to 10) { | ||||
|     krnl.iterate(%ik) with (%ik -> %k = 0 to 10) { | ||||
| 
 | ||||
|     } | ||||
|   } | ||||
|  | @ -41,7 +38,7 @@ func @simple_iterate(%N : index) { | |||
|   // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { | ||||
|   // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): | ||||
|   // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to %{{.*}}, %{{.*}} -> %{{.*}} = 0 to 10) { | ||||
|   krnl.iterate(%oi, %oj) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) { | ||||
|   krnl.iterate(%ii, %ij) with (%ii -> %i = 0 to %N, %ij -> %j = 0 to 10) { | ||||
| 
 | ||||
|   } | ||||
| 
 | ||||
|  | @ -52,25 +49,22 @@ func @simple_iterate(%N : index) { | |||
| 
 | ||||
| func @affine_map_bound(%N : index) { | ||||
|   %ii, %ij, %ik = krnl.define_loops 3 | ||||
|   %oi, %oj, %ok = krnl.optimize_loops  { | ||||
|     krnl.return_loops %ii, %ij, %ik | ||||
|   } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) | ||||
| 
 | ||||
|   // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { | ||||
|   // GENERIC-NEXT: ^bb0(%{{.*}}: index, %{{.*}}: index): | ||||
|   // CHECK: krnl.iterate(%{{.*}}, %{{.*}}) with (%{{.*}} -> %{{.*}} = 0 to 10, %{{.*}} -> %{{.*}} = 0 to 10) { | ||||
|   krnl.iterate(%oi, %oj) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) { | ||||
|   krnl.iterate(%ii, %ij) with (%ii -> %i = affine_map<()->(0)>() to affine_map<()->(10)>(), %ij -> %j = 0 to 10) { | ||||
|     // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { | ||||
|     // GENERIC-NEXT: ^bb0(%{{.*}}: index): | ||||
|     // CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = #{{.*}}(%{{.*}}, %{{.*}}) to #{{.*}}(%{{.*}}, %{{.*}})) { | ||||
|     krnl.iterate(%ok) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) { | ||||
|     krnl.iterate(%ik) with (%ik -> %k = affine_map<(d0, d1)->(d0 - d1)>(%i, %j) to affine_map<(d0, d1)->(d0 + d1)>(%i, %j)) { | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     // GENERIC: "krnl.iterate"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) ( { | ||||
|     // GENERIC-NEXT: ^bb0(%{{.*}}: index): | ||||
|     // CHECK: krnl.iterate(%{{.*}}) with (%{{.*}} -> %{{.*}} = max #map{{.*}}(%{{.*}}, %{{.*}}) to min #map{{.*}}(%{{.*}}, %{{.*}})[%{{.*}}]) { | ||||
|     krnl.iterate(%ok) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) { | ||||
|     krnl.iterate(%ik) with (%ik -> %k = max affine_map<(d0, d1)->(d0 - d1, 0)>(%i, %j) to min affine_map<(d0, d1)[s0]->(d0 + d1, s0)>(%i, %j)[%N]) { | ||||
| 
 | ||||
|     } | ||||
|   } | ||||
|  |  | |||
|  | @ -12,14 +12,12 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { | |||
|   // CHECK: [[MEMPOOL:%.+]] = alloc() : memref<400xi8> | ||||
|   // CHECK: [[GETREF:%.+]] = "krnl.getref"([[MEMPOOL]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> | ||||
|   // CHECK: krnl.define_loops | ||||
|   // CHECK: krnl.optimize_loops | ||||
|   // CHECK: krnl.iterate | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> | ||||
|   // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 | ||||
|   // CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32> | ||||
|   // CHECK: krnl.define_loops | ||||
|   // CHECK: krnl.optimize_loops | ||||
|   // CHECK: krnl.iterate | ||||
|   // CHECK: dealloc [[MEMPOOL]] : memref<400xi8> | ||||
|   // CHECK: return [[RES]] : memref<10x10xf32> | ||||
|  | @ -41,14 +39,12 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3 | |||
|   // CHECK: [[MEMPOOL1:%.+]] = alloc() : memref<400xi8> | ||||
|   // CHECK: [[GETREF1:%.+]] = "krnl.getref"([[MEMPOOL1]], [[CONST0]]) : (memref<400xi8>, i64) -> memref<10x10xf32> | ||||
|   // CHECK: krnl.define_loops | ||||
|   // CHECK: krnl.optimize_loops | ||||
|   // CHECK: krnl.iterate | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 | ||||
|   // CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: krnl.define_loops | ||||
|   // CHECK: krnl.optimize_loops | ||||
|   // CHECK: krnl.iterate | ||||
|   // CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32> | ||||
|  | @ -57,7 +53,6 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3 | |||
|   // CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32 | ||||
|   // CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> | ||||
|   // CHECK: krnl.define_loops | ||||
|   // CHECK: krnl.optimize_loops | ||||
|   // CHECK: krnl.iterate | ||||
|   // CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> | ||||
|   // CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32> | ||||
|  |  | |||
|  | @ -2,7 +2,7 @@ | |||
| 
 | ||||
| // CHECK-LABEL: func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32> | ||||
| func @test_elide_krnl_global_constant(%arg0: memref<1xf32>) -> memref<1x70xf32> { | ||||
|   %0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32> | ||||
|   %0 = "krnl.global"() {name = "constant_0", shape = [1, 70], value = dense<[[0., 1.0, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]> : tensor<1x70xf32>} : () -> memref<1x70xf32> | ||||
|   return %0 : memref<1x70xf32> | ||||
| 
 | ||||
|   // CHECK: {{.*}} = "krnl.global"() {name = "constant_0", shape = [1, 70]} : () -> memref<1x70xf32> | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -12,10 +12,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -23,10 +20,7 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
| 
 | ||||
|   /// Second Add | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -51,10 +45,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -62,10 +53,7 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
| 
 | ||||
|   /// Second Mul | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -90,10 +78,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -101,10 +86,7 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
| 
 | ||||
|   /// Second Div | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -129,10 +111,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -140,10 +119,7 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
| 
 | ||||
|   /// Second Sub | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -168,10 +144,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 | ||||
|  | @ -179,10 +152,7 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor | |||
| 
 | ||||
|   /// Second And | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 | ||||
|  | @ -207,10 +177,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<* | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 | ||||
|  | @ -218,10 +185,7 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<* | |||
| 
 | ||||
|   /// Second Or | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 | ||||
|  | @ -246,10 +210,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi1> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi1> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 | ||||
|  | @ -257,10 +218,7 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor | |||
| 
 | ||||
|   /// Second Xor | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> | ||||
|   // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 | ||||
|  | @ -286,12 +244,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 | ||||
|   // CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|  | @ -301,12 +256,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32 | ||||
|   // CHECK: affine.store [[EXP]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|  | @ -331,12 +283,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32 | ||||
|  | @ -352,12 +301,9 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32 | ||||
|  | @ -388,12 +334,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 | ||||
|  | @ -409,12 +352,9 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 | ||||
|  | @ -445,12 +385,9 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 | ||||
|  | @ -466,12 +403,8 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[TWO:%.+]] = constant {{2.+}} : f32 | ||||
|  | @ -502,12 +435,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 | ||||
|  | @ -522,12 +452,9 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 | ||||
|  | @ -557,12 +484,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32 | ||||
|  | @ -574,12 +498,9 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32 | ||||
|   // CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32 | ||||
|  | @ -605,10 +526,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -616,10 +534,7 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
| 
 | ||||
|   /// Second Sum | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -644,10 +559,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -656,10 +568,7 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|    | ||||
|   /// Second Max | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -685,10 +594,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|   // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -697,10 +603,7 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens | |||
|    | ||||
|   /// Second Min | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { | ||||
|   // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> | ||||
|   // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 | ||||
|  | @ -727,12 +630,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -749,12 +649,9 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -786,12 +683,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -805,12 +699,9 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -839,12 +730,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -862,12 +750,9 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -900,12 +785,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -924,12 +806,9 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32  | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32  | ||||
|  | @ -963,12 +842,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim %arg0, [[C0]] : memref<?x10xf32> | ||||
|   // CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_0:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim %arg0, [[C0_0]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 | ||||
|   // CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32 | ||||
|  | @ -979,12 +855,9 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { | |||
|   // CHECK: [[DIM_0:%.+]] = dim [[RES]], [[C0_1]] : memref<?x10xf32> | ||||
|   // CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> | ||||
|   // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 | ||||
|   // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops  { | ||||
|   // CHECK:   krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 | ||||
|   // CHECK: } : () -> (!krnl.loop, !krnl.loop) | ||||
|   // CHECK: [[C0_2:%.+]] = constant 0 : index | ||||
|   // CHECK: [[DIM_2:%.+]] = dim [[RES]], [[C0_2]] : memref<?x10xf32> | ||||
|   // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: krnl.iterate([[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { | ||||
|   // CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32> | ||||
|   // CHECK: [[ONE:%.+]] = constant {{1.+}} : f32 | ||||
|   // CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue