diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp index 0fcf1c8..b01a197 100644 --- a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp @@ -619,20 +619,35 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern { for (auto arg : iterationBlock.getArguments()) loopIVs.push_back(arg); } - // Fold over operands for each of their scalar values + // Fold over operands for each of their scalar values. Value accumulated, next; - auto accumulatedLoopIVs = getLoopIVsForBroadcasting( + // Obtain the first operand. + std::vector accumulatedLoopIVs = getLoopIVsForBroadcasting( loc, rewriter, loopIVs, operands[0], broadcastedDimInfo[0]); - accumulated = rewriter.create(loc, operands[0], accumulatedLoopIVs); + if (!hasAllConstantDimensions(memRefType)) + // In case of unknown dimensions, use std.load since + // 'getLoopIVsForBroadcasting' has not supported affine map so far. + accumulated = + rewriter.create(loc, operands[0], accumulatedLoopIVs); + else + accumulated = + rewriter.create(loc, operands[0], accumulatedLoopIVs); + // Iterate over the remaining operands. for (unsigned i = 1; i < numArgs; i++) { - auto nextLoopIVs = getLoopIVsForBroadcasting( + std::vector nextLoopIVs = getLoopIVsForBroadcasting( loc, rewriter, loopIVs, operands[i], broadcastedDimInfo[i]); - next = rewriter.create(loc, operands[i], nextLoopIVs); + if (!hasAllConstantDimensions(memRefType)) + // In case of unknown dimensions, use std.load since + // 'getLoopIVsForBroadcasting' has not supported affine map so far. + next = rewriter.create(loc, operands[i], nextLoopIVs); + else + next = rewriter.create(loc, operands[i], nextLoopIVs); accumulated = emitScalarOpFor( rewriter, loc, op, memRefType.getElementType(), {accumulated, next}); } + // Store result in the resulting array. - rewriter.create(loc, accumulated, alloc, loopIVs); + rewriter.create(loc, accumulated, alloc, loopIVs); rewriter.replaceOp(op, alloc); diff --git a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp index 1b6bd58..4748721 100644 --- a/src/Conversion/ONNXToKrnl/Math/Gemm.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Gemm.cpp @@ -156,23 +156,23 @@ struct ONNXGemmOpLowering : public ConversionPattern { // Initialize the output of A*B auto zero = emitConstantOp(rewriter, loc, memRefType.getElementType(), 0); - rewriter.create(loc, zero, alloc, loopMNIVs); + rewriter.create(loc, zero, alloc, loopMNIVs); // Compute A*B auto matmulIterateOp = rewriter.create(loc, reductionPack); // Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting) - auto loadedAB = rewriter.create(loc, alloc, loopMNIVs); + auto loadedAB = rewriter.create(loc, alloc, loopMNIVs); auto alphaAB = rewriter.create(loc, alpha, loadedAB); if (hasBias) { auto loopCIVs = getLoopIVsForBroadcasting( loc, rewriter, loopMNIVs, C, broadcastedDimInfo); - auto loadedC = rewriter.create(loc, C, loopCIVs); + auto loadedC = rewriter.create(loc, C, loopCIVs); auto betaC = rewriter.create(loc, beta, loadedC); auto Y = rewriter.create(loc, alphaAB, betaC); - rewriter.create(loc, Y, alloc, loopMNIVs); + rewriter.create(loc, Y, alloc, loopMNIVs); } else { - rewriter.create(loc, alphaAB, alloc, loopMNIVs); + rewriter.create(loc, alphaAB, alloc, loopMNIVs); } // Insert instructions to do matrix multiplication: A*B @@ -199,12 +199,12 @@ struct ONNXGemmOpLowering : public ConversionPattern { } // Matmul computation - auto loadedA = rewriter.create(loc, A, loopAIVs); - auto loadedB = rewriter.create(loc, B, loopBIVs); - auto loadedY = rewriter.create(loc, alloc, loopMNIVs); + auto loadedA = rewriter.create(loc, A, loopAIVs); + auto loadedB = rewriter.create(loc, B, loopBIVs); + auto loadedY = rewriter.create(loc, alloc, loopMNIVs); auto AB = rewriter.create(loc, loadedA, loadedB); auto accumulated = rewriter.create(loc, loadedY, AB); - rewriter.create(loc, accumulated, alloc, loopMNIVs); + rewriter.create(loc, accumulated, alloc, loopMNIVs); rewriter.replaceOp(op, alloc); diff --git a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp index 8f57357..6eccdb6 100644 --- a/src/Conversion/ONNXToKrnl/Math/MatMul.cpp +++ b/src/Conversion/ONNXToKrnl/Math/MatMul.cpp @@ -221,7 +221,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern { } // Fill the output with value 0. - rewriter.create(loc, zero, alloc, loopBatchMNIVs); + rewriter.create(loc, zero, alloc, loopBatchMNIVs); // Iterate along the reduction dimension. // Use a value from A. @@ -265,17 +265,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern { loopBatchKNIVs.emplace_back(loopMNIVs[0]); } // Matmul computation - auto loadedA = rewriter.create(loc, A, loopBatchMKIVs); - auto loadedB = rewriter.create(loc, B, loopBatchKNIVs); - auto loadedY = rewriter.create(loc, alloc, loopBatchMNIVs); + auto loadedA = rewriter.create(loc, A, loopBatchMKIVs); + auto loadedB = rewriter.create(loc, B, loopBatchKNIVs); + auto loadedY = rewriter.create(loc, alloc, loopBatchMNIVs); if (elementType.isa()) { auto AB = rewriter.create(loc, loadedA, loadedB); auto accumulated = rewriter.create(loc, loadedY, AB); - rewriter.create(loc, accumulated, alloc, loopBatchMNIVs); + rewriter.create(loc, accumulated, alloc, loopBatchMNIVs); } else if (elementType.isa()) { auto AB = rewriter.create(loc, loadedA, loadedB); auto accumulated = rewriter.create(loc, loadedY, AB); - rewriter.create(loc, accumulated, alloc, loopBatchMNIVs); + rewriter.create(loc, accumulated, alloc, loopBatchMNIVs); } } else if ((AShape.size() == 1) && (BShape.size() == 1)) { // Case 3: @@ -283,7 +283,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern { // Fill the output with value 0. Value zeroIndex = rewriter.create(loc, 0); - rewriter.create(loc, zero, alloc, zeroIndex); + rewriter.create(loc, zero, alloc, zeroIndex); // Iterate along the reduction dimension. // Use a value from A. @@ -310,17 +310,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern { loopKIVs.emplace_back(reduceIterationBlock.getArgument(0)); // Matmul computation - auto loadedA = rewriter.create(loc, A, loopKIVs); - auto loadedB = rewriter.create(loc, B, loopKIVs); - auto loadedY = rewriter.create(loc, alloc, zeroIndex); + auto loadedA = rewriter.create(loc, A, loopKIVs); + auto loadedB = rewriter.create(loc, B, loopKIVs); + auto loadedY = rewriter.create(loc, alloc, zeroIndex); if (elementType.isa()) { auto AB = rewriter.create(loc, loadedA, loadedB); auto accumulated = rewriter.create(loc, loadedY, AB); - rewriter.create(loc, accumulated, alloc, zeroIndex); + rewriter.create(loc, accumulated, alloc, zeroIndex); } else if (elementType.isa()) { auto AB = rewriter.create(loc, loadedA, loadedB); auto accumulated = rewriter.create(loc, loadedY, AB); - rewriter.create(loc, accumulated, alloc, zeroIndex); + rewriter.create(loc, accumulated, alloc, zeroIndex); } } else { // No scalar matrix multiplication. diff --git a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp index db0a034..d5bdfa9 100644 --- a/src/Conversion/ONNXToKrnl/Math/Reduction.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Reduction.cpp @@ -212,7 +212,7 @@ struct ONNXReductionOpLowering : public ConversionPattern { Value identity = getIdentityValue(rewriter, loc, elementOutType); - rewriter.create(loc, identity, alloc, loopIVs); + rewriter.create(loc, identity, alloc, loopIVs); // Define an Krnl loop to do reduction. rewriter.setInsertionPointAfter(iterateOpInit); @@ -256,11 +256,11 @@ struct ONNXReductionOpLowering : public ConversionPattern { } Value next, accumulated; - next = rewriter.create(loc, operands[0], inLoopIVs); - accumulated = rewriter.create(loc, alloc, outLoopIVs); + next = rewriter.create(loc, operands[0], inLoopIVs); + accumulated = rewriter.create(loc, alloc, outLoopIVs); accumulated = emitScalarOpFor( rewriter, loc, op, memRefOutType.getElementType(), {accumulated, next}); - rewriter.create(loc, accumulated, alloc, outLoopIVs); + rewriter.create(loc, accumulated, alloc, outLoopIVs); rewriter.replaceOp(op, alloc); return success(); diff --git a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp index ebdf0d1..84927aa 100644 --- a/src/Conversion/ONNXToKrnl/Math/Softmax.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Softmax.cpp @@ -104,8 +104,9 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { outerLoopIVs.push_back(arg); // Reset accumulators. - rewriter.create(loc, zero, sumOp); - rewriter.create(loc, negInfinity, maxOp); + rewriter.create(loc, zero, sumOp, ArrayRef{}); + rewriter.create( + loc, negInfinity, maxOp, ArrayRef{}); // Create an inner loop to compute max. maxIterateOp = rewriter.create(loc, innerPack); @@ -115,8 +116,9 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { softmaxIterateOp = rewriter.create(loc, innerPack); } else { // Reset accumulators. - rewriter.create(loc, zero, sumOp); - rewriter.create(loc, negInfinity, maxOp); + rewriter.create(loc, zero, sumOp, ArrayRef{}); + rewriter.create( + loc, negInfinity, maxOp, ArrayRef{}); // Create an inner loop to compute max. maxIterateOp = rewriter.create(loc, innerPack); @@ -142,16 +144,16 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { maxLoopIVs.push_back(arg); // Compute the max value. - Value max = rewriter.create(loc, maxOp); - Value nextMax = rewriter.create(loc, input, maxLoopIVs); + Value max = rewriter.create(loc, maxOp); + Value nextMax = rewriter.create(loc, input, maxLoopIVs); auto maxCond = rewriter.create(loc, CmpFPredicate::OGT, max, nextMax); max = rewriter.create(loc, maxCond, max, nextMax); - rewriter.create(loc, max, maxOp); + rewriter.create(loc, max, maxOp, ArrayRef{}); // Get the max. rewriter.setInsertionPoint(sumIterateOp); - max = rewriter.create(loc, maxOp); + max = rewriter.create(loc, maxOp); // Insert instructions inside the sum loop. Block &sumIterationBlock = sumIterateOp.bodyRegion().front(); @@ -165,18 +167,18 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { sumLoopIVs.push_back(arg); // Sum up values. - Value sum = rewriter.create(loc, sumOp); - Value next = rewriter.create(loc, input, sumLoopIVs); + Value sum = rewriter.create(loc, sumOp); + Value next = rewriter.create(loc, input, sumLoopIVs); Value sub = rewriter.create(loc, next, max); Value exp = rewriter.create(loc, sub); sum = rewriter.create(loc, sum, exp); - rewriter.create(loc, sum, sumOp); + rewriter.create(loc, sum, sumOp, ArrayRef{}); // Store intermediate values in the result to avoid recomputation. - rewriter.create(loc, exp, alloc, sumLoopIVs); + rewriter.create(loc, exp, alloc, sumLoopIVs); // Get the sum. rewriter.setInsertionPoint(softmaxIterateOp); - sum = rewriter.create(loc, sumOp); + sum = rewriter.create(loc, sumOp); // Insert instructions inside the softmax loop. Block &softmaxIterationBlock = softmaxIterateOp.bodyRegion().front(); @@ -190,9 +192,10 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern { softmaxLoopIVs.push_back(arg); // Compute softmax. - Value expLoadedVal = rewriter.create(loc, alloc, softmaxLoopIVs); + Value expLoadedVal = + rewriter.create(loc, alloc, softmaxLoopIVs); Value result = rewriter.create(loc, expLoadedVal, sum); - rewriter.create(loc, result, alloc, softmaxLoopIVs); + rewriter.create(loc, result, alloc, softmaxLoopIVs); rewriter.replaceOp(op, alloc); diff --git a/src/Conversion/ONNXToKrnl/NN/Conv.cpp b/src/Conversion/ONNXToKrnl/NN/Conv.cpp index 3a74798..3c006a2 100644 --- a/src/Conversion/ONNXToKrnl/NN/Conv.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Conv.cpp @@ -129,10 +129,14 @@ struct ONNXConvOpLowering : public ConversionPattern { if (group > 1) { // Middle loop is over groups and third loop is over the // kernel identifiers in the current group. - auto kernelsOffset = rewriter.create( - loc, outerLoops.getInductionVar(gIndex), kernelsPerGroupValue); - kernel = rewriter.create( - loc, kernelsOffset, outerLoops.getInductionVar(mIndex)); + AffineMap kernelMap = AffineMap::get(2, 1, + /*gIndex=*/rewriter.getAffineDimExpr(0) * + /*kernelsPerGroup=*/rewriter.getAffineSymbolExpr(0) + + /*mIndex=*/rewriter.getAffineDimExpr(1)); + kernel = rewriter.create(loc, kernelMap, + ArrayRef{/*gIndex=*/outerLoops.getInductionVar(gIndex), + /*kernelsPerGroupValue=*/kernelsPerGroupValue, + /*mIndex=*/outerLoops.getInductionVar(mIndex)}); } // 2.2 Define spatial loops @@ -209,9 +213,8 @@ struct ONNXConvOpLowering : public ConversionPattern { /*subchannel=*/rewriter.getAffineSymbolExpr(0) + /*c=*/rewriter.getAffineDimExpr(1)); channelDepth = rewriter.create(loc, indexMap, - ValueRange( - ArrayRef{/*g=*/outerLoops.getInductionVar(gIndex), - /*c=*/channelDepth, /*subchannel=*/subchannels})); + ArrayRef{/*g=*/outerLoops.getInductionVar(gIndex), + /*c=*/channelDepth, /*subchannel=*/subchannels}); } dataIndices.emplace_back(channelDepth); // sX * rX + kX @@ -231,8 +234,8 @@ struct ONNXConvOpLowering : public ConversionPattern { /*sX=*/rewriter.getAffineDimExpr(0) * /*rX=*/stride + /*kX=*/rewriter.getAffineDimExpr(1)); Value outIV = rewriter.create(loc, indexMap, - ValueRange(ArrayRef{spatialLoops.getInductionVar(i), - innerLoops.getInductionVar(i + 1)})); + ArrayRef{spatialLoops.getInductionVar(i), + innerLoops.getInductionVar(i + 1)}); dataIndices.emplace_back(outIV); } diff --git a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp index 6d5a0f8..5d5ca65 100644 --- a/src/Conversion/ONNXToKrnl/NN/Normalization.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Normalization.cpp @@ -79,10 +79,10 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { loopCIVs.emplace_back(rewriter.create(loc, 0)); } - auto scaleVal = rewriter.create(loc, scale, loopCIVs); - auto biasVal = rewriter.create(loc, bias, loopCIVs); - auto meanVal = rewriter.create(loc, mean, loopCIVs); - auto varianceVal = rewriter.create(loc, variance, loopCIVs); + auto scaleVal = rewriter.create(loc, scale, loopCIVs); + auto biasVal = rewriter.create(loc, bias, loopCIVs); + auto meanVal = rewriter.create(loc, mean, loopCIVs); + auto varianceVal = rewriter.create(loc, variance, loopCIVs); // Create a KrnlIterateOp along the other dimensions. SmallVector axes; @@ -118,7 +118,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { loopIVs.emplace_back(args[0]); } - auto xVal = rewriter.create(loc, operand, loopIVs); + auto xVal = rewriter.create(loc, operand, loopIVs); // normalize auto dividend = rewriter.create(loc, xVal, meanVal); auto adjustedVarianceVal = @@ -129,7 +129,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern { auto scaleNormVal = rewriter.create(loc, scaleVal, normVal); auto shiftScaleNormVal = rewriter.create(loc, scaleNormVal, biasVal); - rewriter.create(loc, shiftScaleNormVal, alloc, loopIVs); + rewriter.create(loc, shiftScaleNormVal, alloc, loopIVs); rewriter.replaceOp(op, alloc); diff --git a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp index 970588c..f006525 100644 --- a/src/Conversion/ONNXToKrnl/NN/Pooling.cpp +++ b/src/Conversion/ONNXToKrnl/NN/Pooling.cpp @@ -100,7 +100,7 @@ void postProcessPoolingWindow( ArrayRef poolDimValues) { // AveragePool's result type is FloatType, so it's safe to use DivFOp, SubFOp. bool countIncludePad = getCountIncludePad(poolOp); - Value numerator = rewriter.create(loc, alloc, resultIndices); + Value numerator = rewriter.create(loc, alloc, resultIndices); Value denominator; if (countIncludePad) { int64_t kernelSize = 1; @@ -120,7 +120,7 @@ void postProcessPoolingWindow( Value average = rewriter.create(loc, numerator, denominator); - rewriter.create(loc, average, alloc, resultIndices); + rewriter.create(loc, average, alloc, resultIndices); } //===----------------------------------------------------------------------===// @@ -167,9 +167,7 @@ Value insertAllocAndDeallocForPooling(ConversionPatternRewriter &rewriter, dilations.empty() ? 1 : dilations[spatialIndex])); // Apply the affine map. - Value dimVal = - rewriter.create(loc, dimMap, ValueRange(dimArgs)); - + Value dimVal = rewriter.create(loc, dimMap, dimArgs); allocOperands.emplace_back(dimVal); } } @@ -346,7 +344,7 @@ struct ONNXPoolOpLowering : public ConversionPattern { outputIndices.emplace_back(outputLoops.getInductionVar(i)); // 2.1 Emit: output[n][c][ho][wo] = identity - rewriter.create(loc, identity, alloc, outputIndices); + rewriter.create(loc, identity, alloc, outputIndices); // 2.2 Emit affine maps which express the lower and upper bounds for the // pooling window's dimensions. @@ -441,11 +439,11 @@ struct ONNXPoolOpLowering : public ConversionPattern { { // Construct poolStartValues and poolDimValues. for (int i = 0; i < kernelShape.size(); ++i) { Value startIndex = rewriter.create( - loc, poolStartMap, ValueRange(IVsAndConstants[i])); + loc, poolStartMap, IVsAndConstants[i]); poolStartValues.emplace_back(startIndex); - Value endIndex = rewriter.create( - loc, poolEndMap, ValueRange(IVsAndConstants[i])); + Value endIndex = + rewriter.create(loc, poolEndMap, IVsAndConstants[i]); Value dim = rewriter.create(loc, endIndex, startIndex); if (isDilated) { @@ -514,10 +512,10 @@ struct ONNXPoolOpLowering : public ConversionPattern { Value loadInput = rewriter.create(loc, inputOperand, inputIndices); Value loadPartialOutput = - rewriter.create(loc, alloc, outputIndices); + rewriter.create(loc, alloc, outputIndices); Value output = emitScalarOpFor(rewriter, loc, op, outputElementType, {loadPartialOutput, loadInput}); - rewriter.create(loc, output, alloc, outputIndices); + rewriter.create(loc, output, alloc, outputIndices); } // 2.5 Post-processing for the pooling window, e.g. taking average. diff --git a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp index b6d55be..0a934e1 100644 --- a/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp +++ b/src/Conversion/ONNXToKrnl/RNN/LSTM.cpp @@ -222,13 +222,15 @@ LstmState allocAndInitializeStates( Value hiddenVal = zero; if (!isNoneType(operandAdaptor.initial_h())) - hiddenVal = rewriter.create(loc, operandAdaptor.initial_h(), IVs); - rewriter.create(loc, hiddenVal, state.ht, IVs); + hiddenVal = + rewriter.create(loc, operandAdaptor.initial_h(), IVs); + rewriter.create(loc, hiddenVal, state.ht, IVs); Value cellVal = zero; if (!isNoneType(operandAdaptor.initial_c())) - cellVal = rewriter.create(loc, operandAdaptor.initial_c(), IVs); - rewriter.create(loc, cellVal, state.ct, IVs); + cellVal = + rewriter.create(loc, operandAdaptor.initial_c(), IVs); + rewriter.create(loc, cellVal, state.ct, IVs); } rewriter.restoreInsertionPoint(ipInitializationLoops); return state; @@ -320,8 +322,8 @@ void calculateState( for (unsigned i = 0; i < 4; ++i) { Value wHiddenIV = rewriter.create(loc, accessByOffsetMap, - ValueRange(std::vector{/*iv=*/hiddenIV, - /*index=*/constantIndices[i], /*size=*/hiddenDimVal})); + std::vector{/*iv=*/hiddenIV, + /*index=*/constantIndices[i], /*size=*/hiddenDimVal}); wbIOFCIVs.emplace_back(SmallVector{directionIV, wHiddenIV}); } // Rb[iofc] @@ -329,8 +331,8 @@ void calculateState( SmallVector rbIVs; Value rHiddenIV = rewriter.create(loc, accessByOffsetMap, - ValueRange(std::vector{/*iv=*/hiddenIV, - /*index=*/constantIndices[i], /*size=*/hiddenDimVal})); + std::vector{/*iv=*/hiddenIV, + /*index=*/constantIndices[i], /*size=*/hiddenDimVal}); rbIOFCIVs.emplace_back(SmallVector{directionIV, rHiddenIV}); } } @@ -339,17 +341,16 @@ void calculateState( if (hasPeepholes) { for (unsigned i = 0; i < 3; ++i) { SmallVector pIVs; - Value pHiddenIV = - rewriter.create(loc, accessByOffsetMap, - ValueRange(std::vector{ - hiddenIV, constantIndices[i], hiddenDimVal})); + Value pHiddenIV = rewriter.create(loc, + accessByOffsetMap, + std::vector{hiddenIV, constantIndices[i], hiddenDimVal}); pIOFIVs.emplace_back(SmallVector{directionIV, pHiddenIV}); } } } - Value loadH = rewriter.create(loc, state.ht, hIVs); - Value loadC = rewriter.create(loc, state.ct, cIVs); + Value loadH = rewriter.create(loc, state.ht, hIVs); + Value loadC = rewriter.create(loc, state.ct, cIVs); // Emit instructions for matrix multiplications: // Xt*(Wi^T), Xt*(Wo^T), Xt*(Wf^t), Xt*(Wc^T) @@ -361,9 +362,9 @@ void calculateState( MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0); for (unsigned i = 0; i < 4; ++i) { Value xwAlloc = rewriter.create(loc, scalarMemRefType); - rewriter.create(loc, zero, xwAlloc); + rewriter.create(loc, zero, xwAlloc, ArrayRef{}); Value hrAlloc = rewriter.create(loc, scalarMemRefType); - rewriter.create(loc, zero, hrAlloc); + rewriter.create(loc, zero, hrAlloc, ArrayRef{}); xwIOFC.emplace_back(xwAlloc); hrIOFC.emplace_back(hrAlloc); } @@ -390,10 +391,9 @@ void calculateState( // R[iofc] :: [num_directions, 4*hidden_size, input_size] for (unsigned i = 0; i < 4; ++i) { SmallVector wIVs, rIVs; - Value wHiddenIV = - rewriter.create(loc, accessByOffsetMap, - ValueRange(std::vector{ - hiddenIV, constantIndices[i], hiddenDimVal})); + Value wHiddenIV = rewriter.create(loc, + accessByOffsetMap, + std::vector{hiddenIV, constantIndices[i], hiddenDimVal}); wIVs = {directionIV, wHiddenIV, reductionIV}; wIOFCIVs.emplace_back(wIVs); @@ -402,77 +402,80 @@ void calculateState( rIOFCIVs.emplace_back(rIVs); } - Value loadX = rewriter.create(loc, operandAdaptor.X(), xIVs); + Value loadX = + rewriter.create(loc, operandAdaptor.X(), xIVs); for (unsigned i = 0; i < 4; ++i) { // Xt * Wiofc - Value loadW = - rewriter.create(loc, operandAdaptor.W(), wIOFCIVs[i]); + Value loadW = rewriter.create( + loc, operandAdaptor.W(), wIOFCIVs[i]); Value xwVal = rewriter.create(loc, loadX, loadW); - Value loadXW = rewriter.create(loc, xwIOFC[i]); + Value loadXW = rewriter.create(loc, xwIOFC[i]); Value nextXW = rewriter.create(loc, loadXW, xwVal); - rewriter.create(loc, nextXW, xwIOFC[i]); + rewriter.create( + loc, nextXW, xwIOFC[i], ArrayRef{}); // Ht-1 * Riofc - Value loadR = - rewriter.create(loc, operandAdaptor.R(), rIOFCIVs[i]); + Value loadR = rewriter.create( + loc, operandAdaptor.R(), rIOFCIVs[i]); Value hrVal = rewriter.create(loc, loadH, loadR); - Value loadHR = rewriter.create(loc, hrIOFC[i]); + Value loadHR = rewriter.create(loc, hrIOFC[i]); Value nextHR = rewriter.create(loc, loadHR, hrVal); - rewriter.create(loc, nextHR, hrIOFC[i]); + rewriter.create( + loc, nextHR, hrIOFC[i], ArrayRef{}); } } rewriter.restoreInsertionPoint(ipReductionLoops); } // it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi) - Value loadXWI = rewriter.create(loc, xwIOFC[0]); - Value loadHRI = rewriter.create(loc, hrIOFC[0]); + Value loadXWI = rewriter.create(loc, xwIOFC[0]); + Value loadHRI = rewriter.create(loc, hrIOFC[0]); Value it = rewriter.create(loc, loadXWI, loadHRI); if (hasPeepholes) { Value loadP = - rewriter.create(loc, operandAdaptor.P(), pIOFIVs[0]); + rewriter.create(loc, operandAdaptor.P(), pIOFIVs[0]); Value PC = rewriter.create(loc, loadP, loadC); it = rewriter.create(loc, it, PC); } if (hasBiasForInput) { Value loadWB = - rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[0]); + rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[0]); it = rewriter.create(loc, it, loadWB); Value loadRB = - rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[0]); + rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[0]); it = rewriter.create(loc, it, loadRB); } it = applyActivation(rewriter, loc, activationPack.f, it); // ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf) - Value loadXWF = rewriter.create(loc, xwIOFC[2]); - Value loadHRF = rewriter.create(loc, hrIOFC[2]); + Value loadXWF = rewriter.create(loc, xwIOFC[2]); + Value loadHRF = rewriter.create(loc, hrIOFC[2]); Value ft = rewriter.create(loc, loadXWF, loadHRF); if (hasPeepholes) { Value loadP = - rewriter.create(loc, operandAdaptor.P(), pIOFIVs[2]); + rewriter.create(loc, operandAdaptor.P(), pIOFIVs[2]); Value PC = rewriter.create(loc, loadP, loadC); ft = rewriter.create(loc, ft, PC); } if (hasBiasForInput) { Value loadWB = - rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[2]); + rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[2]); ft = rewriter.create(loc, ft, loadWB); Value loadRB = - rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[2]); + rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[2]); ft = rewriter.create(loc, ft, loadRB); } ft = applyActivation(rewriter, loc, activationPack.f, ft); // ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc) - Value loadXWC = rewriter.create(loc, xwIOFC[3]); - Value loadHRC = rewriter.create(loc, hrIOFC[3]); + Value loadXWC = rewriter.create(loc, xwIOFC[3]); + Value loadHRC = rewriter.create(loc, hrIOFC[3]); Value ct = rewriter.create(loc, loadXWC, loadHRC); if (hasBiasForInput) { Value loadWB = - rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[3]); + rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[3]); ct = rewriter.create(loc, ct, loadWB); Value loadRB = - rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[3]); + rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[3]); ct = rewriter.create(loc, ct, loadRB); } ct = applyActivation(rewriter, loc, activationPack.g, ct); @@ -481,24 +484,24 @@ void calculateState( Value FtCt1 = rewriter.create(loc, ft, loadC); Value itct = rewriter.create(loc, it, ct); Value Ct = rewriter.create(loc, FtCt1, itct); - rewriter.create(loc, Ct, state.ct, cIVs); + rewriter.create(loc, Ct, state.ct, cIVs); // ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo) - Value loadXWO = rewriter.create(loc, xwIOFC[1]); - Value loadHRO = rewriter.create(loc, hrIOFC[1]); + Value loadXWO = rewriter.create(loc, xwIOFC[1]); + Value loadHRO = rewriter.create(loc, hrIOFC[1]); Value ot = rewriter.create(loc, loadXWO, loadHRO); if (hasPeepholes) { Value loadP = - rewriter.create(loc, operandAdaptor.P(), pIOFIVs[1]); + rewriter.create(loc, operandAdaptor.P(), pIOFIVs[1]); Value PC = rewriter.create(loc, loadP, Ct); ot = rewriter.create(loc, ot, PC); } if (hasBiasForInput) { Value loadWB = - rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[1]); + rewriter.create(loc, operandAdaptor.B(), wbIOFCIVs[1]); ot = rewriter.create(loc, ot, loadWB); Value loadRB = - rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[1]); + rewriter.create(loc, operandAdaptor.B(), rbIOFCIVs[1]); ot = rewriter.create(loc, ot, loadRB); } ot = applyActivation(rewriter, loc, activationPack.f, ot); @@ -506,12 +509,12 @@ void calculateState( // Ht = ot (.) h(Ct) Value hCt = applyActivation(rewriter, loc, activationPack.h, Ct); Value Ht = rewriter.create(loc, ot, hCt); - rewriter.create(loc, Ht, state.ht, hIVs); + rewriter.create(loc, Ht, state.ht, hIVs); // Store the current Ht if required. if (!isNoneType(state.allH)) { SmallVector allHIVs{sequenceIV, directionIV, batchIV, hiddenIV}; - rewriter.create(loc, Ht, state.allH, allHIVs); + rewriter.create(loc, Ht, state.allH, allHIVs); } // Deallocate the temporary results of matrix multiplications. diff --git a/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp b/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp index 5a0c8db..6eccf49 100644 --- a/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp +++ b/src/Conversion/ONNXToKrnl/RNN/RNNBase.cpp @@ -28,7 +28,7 @@ Value applyActivation(ConversionPatternRewriter &rewriter, Location loc, MemRefType scalarMemRefType = MemRefType::get({}, scalarOperand.getType(), {}, 0); Value alloc = rewriter.create(loc, scalarMemRefType); - rewriter.create(loc, scalarOperand, alloc); + rewriter.create(loc, scalarOperand, alloc, ArrayRef{}); std::vector attributes; if (activation.alpha) { @@ -68,6 +68,6 @@ Value applyActivation(ConversionPatternRewriter &rewriter, Location loc, else llvm_unreachable("Unsupported activation"); - Value result = rewriter.create(loc, res); + Value result = rewriter.create(loc, res); return result; } diff --git a/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp b/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp index 29abceb..3d748e4 100644 --- a/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp +++ b/src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp @@ -126,9 +126,9 @@ struct ONNXRNNOpLowering : public ConversionPattern { rewriter.getIndexType(), (direction == REVERSE) ? 0 : 1); Value reverseSequenceIV = rewriter.create(loc, reverseIVMap, - ValueRange(std::vector{sequenceLoops.getInductionVar(0), + std::vector{sequenceLoops.getInductionVar(0), emitConstantOp(rewriter, loc, rewriter.getIndexType(), - sequenceDimSize)})); + sequenceDimSize)}); // Emit calculation for one RNN step. calculateState(rewriter, loc, operandAdaptor, state, activationReverse, directionIV, reverseSequenceIV); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp index 85f3b82..e13eb47 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Concat.cpp @@ -59,15 +59,18 @@ struct ONNXConcatOpLowering : public ConversionPattern { if (r != axis || writeOffset == 0) { writeIndices.emplace_back(inputLoops.getInductionVar(r)); } else { - auto indexWithOffset = rewriter.create(loc, - rewriter.create(loc, writeOffset), - inputLoops.getInductionVar(r)); + AffineMap indexWithOffsetMap = + AffineMap::get(1, 0, rewriter.getAffineDimExpr(0) + writeOffset); + Value indexWithOffset = + rewriter.create(loc, indexWithOffsetMap, + ArrayRef{inputLoops.getInductionVar(r)}); writeIndices.emplace_back(indexWithOffset); } } // Insert copy. - auto loadData = rewriter.create(loc, operands[i], readIndices); - rewriter.create(loc, loadData, alloc, writeIndices); + auto loadData = + rewriter.create(loc, operands[i], readIndices); + rewriter.create(loc, loadData, alloc, writeIndices); // Increment offset writeOffset += currShape[axis]; } diff --git a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp index 811cb87..81e9105 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Pad.cpp @@ -88,16 +88,17 @@ struct ONNXPadOpLowering : public ConversionPattern { if (pads[i] == 0) { outLoopIVs.emplace_back(valueLoops.getInductionVar(i)); } else { - auto outIV = rewriter.create(loc, - rewriter.create(loc, pads[i]), - valueLoops.getInductionVar(i)); + AffineMap indexWithOffsetMap = + AffineMap::get(1, 0, rewriter.getAffineDimExpr(0) + pads[i]); + Value outIV = rewriter.create(loc, indexWithOffsetMap, + ArrayRef{valueLoops.getInductionVar(i)}); outLoopIVs.emplace_back(outIV); } } auto originValue = - rewriter.create(loc, operandAdaptor.data(), inLoopIVs); - rewriter.create(loc, originValue, alloc, outLoopIVs); + rewriter.create(loc, operandAdaptor.data(), inLoopIVs); + rewriter.create(loc, originValue, alloc, outLoopIVs); rewriter.setInsertionPointToStart(padLoops.getIterateBlock()); SmallVector outLoopIVs1; @@ -105,7 +106,7 @@ struct ONNXPadOpLowering : public ConversionPattern { outLoopIVs1.emplace_back(padLoops.getInductionVar(i)); auto paddingValue = rewriter.create(loc, valueAttr); - rewriter.create(loc, paddingValue, alloc, outLoopIVs1); + rewriter.create(loc, paddingValue, alloc, outLoopIVs1); // Replace the original op with the generated code. rewriter.replaceOp(op, alloc); diff --git a/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp b/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp index 25fe213..a201107 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/PadConstantValuePad.cpp @@ -77,15 +77,17 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern { if (pad_begin[i] == 0) { outLoopIVs.emplace_back(valueLoops.getInductionVar(i)); } else { - auto outIV = rewriter.create(loc, - rewriter.create(loc, pad_begin[i]), - valueLoops.getInductionVar(i)); + AffineMap indexWithOffsetMap = + AffineMap::get(1, 0, rewriter.getAffineDimExpr(0) + pad_begin[i]); + Value outIV = rewriter.create(loc, indexWithOffsetMap, + ArrayRef{valueLoops.getInductionVar(i)}); outLoopIVs.emplace_back(outIV); } } - auto inVal = rewriter.create(loc, operandAdaptor.data(), inLoopIVs); - rewriter.create(loc, inVal, alloc, outLoopIVs); + auto inVal = + rewriter.create(loc, operandAdaptor.data(), inLoopIVs); + rewriter.create(loc, inVal, alloc, outLoopIVs); rewriter.setInsertionPointToStart(padLoops.getIterateBlock()); SmallVector outLoopIVs1; @@ -93,7 +95,7 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern { outLoopIVs1.emplace_back(padLoops.getInductionVar(i)); auto inVal1 = rewriter.create(loc, constantValAttr); - rewriter.create(loc, inVal1, alloc, outLoopIVs1); + rewriter.create(loc, inVal1, alloc, outLoopIVs1); // Replace the original op with the generated code. rewriter.replaceOp(op, alloc); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp b/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp index 4b2c01b..eeabde7 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Reshape.cpp @@ -64,7 +64,8 @@ struct ONNXReshapeOpLowering : public ConversionPattern { for (int i = 0; i < memRefShape.size(); ++i) { Value index = emitConstantOp(rewriter, loc, rewriter.getIndexType(), i); // Load index from array of indices. - Value loadedVal = rewriter.create(loc, operands[1], index); + Value loadedVal = + rewriter.create(loc, operands[1], index); // If a dimension is zero, the actual dimension value is taken from the // input tensor. // diff --git a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp index 8b5e31c..eaf84af 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Split.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Split.cpp @@ -92,8 +92,9 @@ struct ONNXSplitOpLowering : public ConversionPattern { writeIndices.emplace_back(outputLoops.getInductionVar(r)); } // Insert copy. - auto loadData = rewriter.create(loc, operands[0], readIndices); - rewriter.create(loc, loadData, allocs[i], writeIndices); + auto loadData = + rewriter.create(loc, operands[0], readIndices); + rewriter.create(loc, loadData, allocs[i], writeIndices); } rewriter.replaceOp(op, allocs); return success(); diff --git a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp index a51a935..d4f48e6 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp @@ -80,8 +80,8 @@ struct ONNXTransposeOpLowering : public ConversionPattern { for (int i = 0; i < iterationBlock.getArguments().size(); ++i) outLoopIVs.emplace_back(iterationBlock.getArguments()[perm[i]]); - auto inVal = rewriter.create(loc, data, inLoopIVs); - rewriter.create(loc, inVal, alloc, outLoopIVs); + auto inVal = rewriter.create(loc, data, inLoopIVs); + rewriter.create(loc, inVal, alloc, outLoopIVs); rewriter.replaceOp(op, alloc); diff --git a/test/mlir/onnx/onnx_enable_memory_pool.mlir b/test/mlir/onnx/onnx_enable_memory_pool.mlir index 9b1e1c7..dd78e38 100644 --- a/test/mlir/onnx/onnx_enable_memory_pool.mlir +++ b/test/mlir/onnx/onnx_enable_memory_pool.mlir @@ -14,10 +14,10 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> { // CHECK: krnl.define_loops // CHECK: krnl.optimize_loops // CHECK: krnl.iterate - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg1, %arg2] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg0[%arg1, %arg2] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32> + // CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32> // CHECK: krnl.define_loops // CHECK: krnl.optimize_loops // CHECK: krnl.iterate @@ -43,26 +43,26 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3 // CHECK: krnl.define_loops // CHECK: krnl.optimize_loops // CHECK: krnl.iterate - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32> // CHECK: krnl.define_loops // CHECK: krnl.optimize_loops // CHECK: krnl.iterate - // CHECK: [[LOAD3:%.+]] = load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32> - // CHECK: [[LOAD4:%.+]] = load %arg1[%arg4, %arg3] : memref<10x20xf32> - // CHECK: [[LOAD5:%.+]] = load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> + // CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32> + // CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32> + // CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> // CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32 // CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32 - // CHECK: store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> + // CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> // CHECK: krnl.define_loops // CHECK: krnl.optimize_loops // CHECK: krnl.iterate - // CHECK: [[LOAD6:%.+]] = load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> - // CHECK: [[LOAD7:%.+]] = load %arg1[%arg2, %arg3] : memref<10x20xf32> + // CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> + // CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32> // CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32 - // CHECK: store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32> + // CHECK: affine.store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32> // CHECK: dealloc [[MEMPOOL1]] : memref<400xi8> // CHECK: dealloc [[MEMPOOL0]] : memref<800xi8> // CHECK: return [[RES]] : memref<10x20xf32> diff --git a/test/mlir/onnx/onnx_lowering.mlir b/test/mlir/onnx/onnx_lowering.mlir index ed46b60..7b8152b 100644 --- a/test/mlir/onnx/onnx_lowering.mlir +++ b/test/mlir/onnx/onnx_lowering.mlir @@ -38,10 +38,10 @@ func @test_elementwise_op_with_scalar_values_2(%arg0 : tensor, %arg1 : tens // CHECK-LABEL: test_elementwise_op_with_scalar_values_2 // CHECK: [[RES:%.+]] = alloc() : memref - // CHECK: [[LOAD1:%.+]] = load %arg0[] : memref - // CHECK: [[LOAD2:%.+]] = load %arg1[] : memref + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[] : memref + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[] : memref // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADD]], [[RES]][] : memref + // CHECK: affine.store [[ADD]], [[RES]][] : memref // CHECK: return [[RES]] : memref } @@ -53,12 +53,12 @@ func @test_elementwise_op_with_scalar_values_3(%arg0 : tensor, %arg1 : tens // CHECK-LABEL: test_elementwise_op_with_scalar_values_3 // CHECK: [[RES:%.+]] = alloc() : memref - // CHECK: [[LOAD1:%.+]] = load %arg0[] : memref - // CHECK: [[LOAD2:%.+]] = load %arg1[] : memref + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[] : memref + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[] : memref // CHECK: [[ADD1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: [[LOAD3:%.+]] = load %arg2[] : memref + // CHECK: [[LOAD3:%.+]] = affine.load %arg2[] : memref // CHECK: [[ADD2:%.+]] = addf [[ADD1]], [[LOAD3]] : f32 - // CHECK: store [[ADD2]], [[RES]][] : memref + // CHECK: affine.store [[ADD2]], [[RES]][] : memref // CHECK: return [[RES]] : memref } @@ -75,10 +75,10 @@ func @test_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: return [[RES]] : memref<10x10xf32> } @@ -95,10 +95,10 @@ func @test_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: return [[RES]] : memref<10x10xf32> } @@ -115,10 +115,10 @@ func @test_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: return [[RES]] : memref<10x10xf32> } @@ -135,10 +135,10 @@ func @test_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: return [[RES]] : memref<10x10xf32> } @@ -155,10 +155,10 @@ func @test_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*xi // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: return [[RES]] : memref<10x10xi1> } @@ -175,10 +175,10 @@ func @test_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*xi1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: return [[RES]] : memref<10x10xi1> } @@ -195,10 +195,10 @@ func @test_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*xi // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: return [[RES]] : memref<10x10xi1> } @@ -411,7 +411,7 @@ func @test_reshape(%arg0 : tensor, %arg1 : tensor<4xi64>) -> tensor<*x // CHECK: [[TYPE_IN_BYTES_1:%.+]] = constant 4 : i64 // CHECK: %[[CONSTANT_1:.+]] = constant 0 : index - // CHECK: [[LOAD_0:%.+]] = load %arg1[%[[CONSTANT_1]]] : memref<4xi64> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg1[%[[CONSTANT_1]]] : memref<4xi64> // CHECK: [[DIM_1:%.+]] = dim %arg0, 0 : memref // CHECK: [[DIM_1_CAST:%.+]] = index_cast [[DIM_1]] : index to i64 // CHECK: [[CONSTANT_2:%.+]] = constant 0 : i64 @@ -420,7 +420,7 @@ func @test_reshape(%arg0 : tensor, %arg1 : tensor<4xi64>) -> tensor<*x // CHECK: [[MUL_1:%.+]] = muli [[TYPE_IN_BYTES_1]], [[SELECT_0]] : i64 // CHECK: %[[CONSTANT_3:.+]] = constant 1 : index - // CHECK: [[LOAD_1:%.+]] = load %arg1[%[[CONSTANT_3]]] : memref<4xi64> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%[[CONSTANT_3]]] : memref<4xi64> // CHECK: [[CONSTANT_3:%.+]] = constant 10 : i64 // CHECK: [[CONSTANT_4:%.+]] = constant 0 : i64 // CHECK: [[CMP_1:%.+]] = cmpi "eq", [[LOAD_1]], [[CONSTANT_4]] : i64 @@ -428,11 +428,11 @@ func @test_reshape(%arg0 : tensor, %arg1 : tensor<4xi64>) -> tensor<*x // CHECK: [[MUL_2:%.+]] = muli [[MUL_1]], [[SELECT_1]] : i64 // CHECK: %[[CONSTANT_5:.+]] = constant 2 : index - // CHECK: [[LOAD_2:%.+]] = load %arg1[%[[CONSTANT_5]]] : memref<4xi64> + // CHECK: [[LOAD_2:%.+]] = affine.load %arg1[%[[CONSTANT_5]]] : memref<4xi64> // CHECK: [[MUL_3:%.+]] = muli [[MUL_2]], [[LOAD_2]] : i64 // CHECK: %[[CONSTANT_6:.+]] = constant 3 : index - // CHECK: [[LOAD_3:%.+]] = load %arg1[%[[CONSTANT_6]]] : memref<4xi64> + // CHECK: [[LOAD_3:%.+]] = affine.load %arg1[%[[CONSTANT_6]]] : memref<4xi64> // CHECK: [[MUL_4:%.+]] = muli [[MUL_3]], [[LOAD_3]] : i64 // CHECK: [[CONSTANT_7:%.+]] = constant 0 : i64 @@ -477,10 +477,10 @@ func @test_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: return [[RES]] : memref<10x10xf32> } @@ -497,11 +497,11 @@ func @test_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: return [[RES]] : memref<10x10xf32> } @@ -518,11 +518,11 @@ func @test_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: return [[RES]] : memref<10x10xf32> } @@ -731,7 +731,7 @@ func @test_add_with_broadcasting(%arg0 : tensor, %arg1 : tensor // CHECK: [[LOAD1:%.+]] = load %arg0[%[[SELECT1]]] : memref // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3] : memref // CHECK: } // CHECK: return [[RES]] : memref } @@ -750,18 +750,18 @@ func @test_reducemax(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32 - // CHECK: store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> // CHECK: [[CMP:%.+]] = cmpf "ogt", [[LOAD2]], [[LOAD1]] : f32 // CHECK: [[SELECT:%.+]] = select %7, %6, %5 : f32 - // CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> } @@ -780,18 +780,18 @@ func @test_reducemin(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 0x7F800000 : f32 - // CHECK: store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> // CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD2]], [[LOAD1]] : f32 // CHECK: [[SELECT:%.+]] = select %7, %6, %5 : f32 - // CHECK: store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> } @@ -810,17 +810,17 @@ func @test_reduceprod(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 1.000000e+00 : f32 - // CHECK: store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> // CHECK: [[REDUCE:%.+]] = mulf %6, %5 : f32 - // CHECK: store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> } @@ -839,17 +839,17 @@ func @test_reducesum(%arg0 : tensor<3x2x2xf32>) -> tensor<*xf32> { // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 2) { // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 - // CHECK: store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2] : memref<3x2xf32> // CHECK: [[DEF_LOOPS2:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_LOOPS2:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 3, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 2, [[DEF_LOOPS2]]#2 -> %arg3 = 0 to 2) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> - // CHECK: [[LOAD2:%.+]] = load %0[%arg1, %arg3] : memref<3x2xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<3x2x2xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %0[%arg1, %arg3] : memref<3x2xf32> // CHECK: [[REDUCE:%.+]] = addf %6, %5 : f32 - // CHECK: store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> + // CHECK: affine.store [[REDUCE]], [[RES]][%arg1, %arg3] : memref<3x2xf32> // CHECK: } // CHECK: return [[RES]] : memref<3x2xf32> } @@ -871,30 +871,30 @@ func @test_softmax(%arg0 : tensor<10x10xf32>) -> tensor<*xf32> { // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, %3#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to 10) { - // CHECK: store [[CST]], [[SUM]][] : memref - // CHECK: store [[CST_0]], [[MAX]][] : memref + // CHECK: affine.store [[CST]], [[SUM]][] : memref + // CHECK: affine.store [[CST_0]], [[MAX]][] : memref // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[MAX]][] : memref - // CHECK: [[LOAD2:%.+]] = load %arg0[%arg1, %arg2] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[MAX]][] : memref + // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[COND:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[SELECT:%.+]] = select [[COND]], [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[SELECT]], [[MAX]][] : memref + // CHECK: affine.store [[SELECT]], [[MAX]][] : memref // CHECK: } - // CHECK: %5 = load [[MAX]][] : memref + // CHECK: %5 = affine.load [[MAX]][] : memref // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { - // CHECK: [[LOAD1]] = load [[SUM]][] : memref - // CHECK: [[LOAD2]] = load %arg0[%arg1, %arg2] : memref<10x10xf32> + // CHECK: [[LOAD1]] = affine.load [[SUM]][] : memref + // CHECK: [[LOAD2]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[SUB:%.+]] = subf [[LOAD2]], %5 : f32 // CHECK: [[EXP:%.+]] = exp [[SUB]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[EXP]] : f32 - // CHECK: store [[ADD]], [[SUM]][] : memref - // CHECK: store %10, [[RES]][%arg1, %arg2] : memref<10x10xf32> + // CHECK: affine.store [[ADD]], [[SUM]][] : memref + // CHECK: affine.store %10, [[RES]][%arg1, %arg2] : memref<10x10xf32> // CHECK: } - // CHECK: %6 = load [[SUM]][] : memref + // CHECK: %6 = affine.load [[SUM]][] : memref // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) { - // CHECK: [[LOAD1]] = load [[RES]][%arg1, %arg2] : memref<10x10xf32> + // CHECK: [[LOAD1]] = affine.load [[RES]][%arg1, %arg2] : memref<10x10xf32> // CHECK: [[DIV:%.+]] = divf [[LOAD1]], %6 : f32 - // CHECK: store [[DIV]], [[RES]][%arg1, %arg2] : memref<10x10xf32> + // CHECK: affine.store [[DIV]], [[RES]][%arg1, %arg2] : memref<10x10xf32> // CHECK: } // CHECK: } // CHECK: dealloc [[SUM]] : memref @@ -918,19 +918,19 @@ func @test_gemm(%arg0 : tensor<5x10xf32>, %arg1 : tensor<5x10xf32>, %arg2: tenso // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg3 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg4 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#2) with ([[DEF_LOOPS]]#2 -> %arg5 = 0 to 5) { - // CHECK: [[A:%.+]] = load %arg0[%arg5, %arg3] : memref<5x10xf32> - // CHECK: [[B:%.+]] = load %arg1[%arg5, %arg4] : memref<5x10xf32> - // CHECK: [[Y:%.+]] = load [[RES]][%arg3, %arg4] : memref<10x10xf32> + // CHECK: [[A:%.+]] = affine.load %arg0[%arg5, %arg3] : memref<5x10xf32> + // CHECK: [[B:%.+]] = affine.load %arg1[%arg5, %arg4] : memref<5x10xf32> + // CHECK: [[Y:%.+]] = affine.load [[RES]][%arg3, %arg4] : memref<10x10xf32> // CHECK: [[AB:%.+]] = mulf [[A]], [[B]] : f32 // CHECK: [[SUM:%.+]] = addf [[Y]], [[AB]] : f32 - // CHECK: store [[SUM]], [[RES]][%arg3, %arg4] : memref<10x10xf32> + // CHECK: affine.store [[SUM]], [[RES]][%arg3, %arg4] : memref<10x10xf32> // CHECK: } - // CHECK: [[LOAD_Y:%.+]] = load [[RES]][%arg3, %arg4] : memref<10x10xf32> + // CHECK: [[LOAD_Y:%.+]] = affine.load [[RES]][%arg3, %arg4] : memref<10x10xf32> // CHECK: [[ALPHA_AB:%.+]] = mulf [[ALPHA]], [[LOAD_Y]] : f32 - // CHECK: [[C:%.+]] = load %arg2[%arg4] : memref<10xf32> + // CHECK: [[C:%.+]] = affine.load %arg2[%arg4] : memref<10xf32> // CHECK: [[BETA_C:%.+]] = mulf [[BETA]], [[C]] : f32 // CHECK: [[Y_RES:%.+]] = addf [[ALPHA_AB]], [[BETA_C]] : f32 - // CHECK: store [[Y_RES]], [[RES]][%arg3, %arg4] : memref<10x10xf32> + // CHECK: affine.store [[Y_RES]], [[RES]][%arg3, %arg4] : memref<10x10xf32> // CHECK: } // CHECK: return [[RES]] : memref<10x10xf32> // CHECK: } @@ -994,16 +994,16 @@ func @test_transpose(%arg0 : tensor<10x20x30x40xf32>) -> tensor<*xf32> { // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1, [[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#0 -> %arg1 = 0 to 10, [[LOOPS]]#1 -> %arg2 = 0 to 20, [[LOOPS]]#2 -> %arg3 = 0 to 30, [[LOOPS]]#3 -> %arg4 = 0 to 40) { - // CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2, %arg3, %arg4] : memref<10x20x30x40xf32> - // CHECK: store [[LOAD]], [[RES1]][%arg4, %arg3, %arg2, %arg1] : memref<40x30x20x10xf32> + // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3, %arg4] : memref<10x20x30x40xf32> + // CHECK: affine.store [[LOAD]], [[RES1]][%arg4, %arg3, %arg2, %arg1] : memref<40x30x20x10xf32> // CHECK: [[LOOPS:%.+]]:4 = krnl.define_loops 4 // CHECK: [[OPT_LOOPS:%.+]]:4 = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1, [[LOOPS]]#2, [[LOOPS]]#3 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1, [[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#0 -> %arg1 = 0 to 40, [[LOOPS]]#1 -> %arg2 = 0 to 30, [[LOOPS]]#2 -> %arg3 = 0 to 20, [[LOOPS]]#3 -> %arg4 = 0 to 10) { - // CHECK: [[LOAD:%.+]] = load [[RES1]][%arg1, %arg2, %arg3, %arg4] : memref<40x30x20x10xf32> - // CHECK: store [[LOAD]], [[RES0]][%arg1, %arg4, %arg2, %arg3] : memref<40x10x30x20xf32> + // CHECK: [[LOAD:%.+]] = affine.load [[RES1]][%arg1, %arg2, %arg3, %arg4] : memref<40x30x20x10xf32> + // CHECK: affine.store [[LOAD]], [[RES0]][%arg1, %arg4, %arg2, %arg3] : memref<40x10x30x20xf32> // CHECK: dealloc [[RES1]] : memref<40x30x20x10xf32> // CHECK: return [[RES0]] : memref<40x10x30x20xf32> @@ -1088,18 +1088,18 @@ func @test_matmul1(%arg0 : tensor<10x5xf32>, %arg1 : tensor<5x10xf32>) -> tensor // CHECK: krnl.return_loops [[LOOPS]]#0, [[LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 10, [[LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS_REDUCE]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2, %arg4] : memref<10x5xf32> - // CHECK: [[LOAD_1:%.+]] = load %arg1[%arg4, %arg3] : memref<5x10xf32> - // CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2, %arg4] : memref<10x5xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<5x10xf32> + // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: } // CHECK: } // CHECK: return [[RES]] : memref<10x10xf32> @@ -1121,18 +1121,18 @@ func @test_matmul2(%arg0 : tensor<10x5xf32>, %arg1 : tensor<2x3x5x10xf32>) -> te // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) { // CHECK: krnl.iterate([[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) { - // CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> + // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS_REDUCE]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg4, %arg6] : memref<10x5xf32> - // CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32> - // CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg4, %arg6] : memref<10x5xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32> + // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: } // CHECK: } // CHECK: } @@ -1155,18 +1155,18 @@ func @test_matmul3(%arg0 : tensor<2x3x10x5xf32>, %arg1 : tensor<2x3x5x10xf32>) - // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[LOOPS]]#0 -> %arg2 = 0 to 2, [[LOOPS]]#1 -> %arg3 = 0 to 3) { // CHECK: krnl.iterate([[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[LOOPS]]#2 -> %arg4 = 0 to 10, [[LOOPS]]#3 -> %arg5 = 0 to 10) { - // CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> + // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS_REDUCE]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg6 = 0 to 5) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2, %arg3, %arg4, %arg6] : memref<2x3x10x5xf32> - // CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32> - // CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2, %arg3, %arg4, %arg6] : memref<2x3x10x5xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2, %arg3, %arg6, %arg5] : memref<2x3x5x10xf32> + // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<2x3x10x10xf32> // CHECK: } // CHECK: } // CHECK: } @@ -1188,18 +1188,18 @@ func @test_matmul4(%arg0 : tensor<5xf32>, %arg1 : tensor<5x10xf32>) -> tensor<*x // CHECK: krnl.return_loops [[LOOPS]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS]]) with ([[LOOPS]] -> %arg2 = 0 to 10) { - // CHECK: store [[CONSTANT]], [[RES]][%arg2] : memref<10xf32> + // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2] : memref<10xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS_REDUCE]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg3 = 0 to 5) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg3] : memref<5xf32> - // CHECK: [[LOAD_1:%.+]] = load %arg1[%arg3, %arg2] : memref<5x10xf32> - // CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2] : memref<10xf32> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg3] : memref<5xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg3, %arg2] : memref<5x10xf32> + // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2] : memref<10xf32> // CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2] : memref<10xf32> + // CHECK: affine.store [[ADD]], [[RES]][%arg2] : memref<10xf32> // CHECK: } // CHECK: } // CHECK: return [[RES]] : memref<10xf32> @@ -1223,18 +1223,18 @@ func @test_matmul5(%arg0 : tensor<5xf32>, %arg1 : tensor) -> tensor< // CHECK: [[DIM_1:%.+]] = dim [[RES]], 0 : memref // CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) { // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref + // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS_REDUCE]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg4] : memref<5xf32> - // CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2, %arg4, %arg3] : memref - // CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3] : memref + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg4] : memref<5xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2, %arg4, %arg3] : memref + // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref // CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3] : memref // CHECK: } // CHECK: } // CHECK: } @@ -1259,18 +1259,18 @@ func @test_matmul6(%arg0 : tensor, %arg1 : tensor<5xf32>) -> tensor< // CHECK: [[DIM_1:%.+]] = dim [[RES]], 0 : memref // CHECK: krnl.iterate([[OPT_LOOPS]]#0) with ([[LOOPS]]#0 -> %arg2 = 0 to [[DIM_1]]) { // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref + // CHECK: affine.store [[CONSTANT]], [[RES]][%arg2, %arg3] : memref // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS_REDUCE]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg4 = 0 to 5) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2, %arg3, %arg4] : memref - // CHECK: [[LOAD_1:%.+]] = load %arg1[%arg4] : memref<5xf32> - // CHECK: [[LOAD_RES:%.+]] = load [[RES]][%arg2, %arg3] : memref + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2, %arg3, %arg4] : memref + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg4] : memref<5xf32> + // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref // CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3] : memref // CHECK: } // CHECK: } // CHECK: } @@ -1288,18 +1288,18 @@ func @test_matmul7(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<*xf32 // CHECK: [[RES:%.+]] = alloc() : memref<1xf32> // CHECK: [[CONSTANT:%.+]] = constant 0.000000e+00 : f32 // CHECK: %[[CONSTANT_INDEX:.+]] = constant 0 : index - // CHECK: store [[CONSTANT]], [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> + // CHECK: affine.store [[CONSTANT]], [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> // CHECK: [[LOOPS_REDUCE:%.+]] = krnl.define_loops 1 // CHECK: [[OPT_LOOPS_REDUCE:%.+]] = krnl.optimize_loops { // CHECK: krnl.return_loops [[LOOPS_REDUCE]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[OPT_LOOPS_REDUCE]]) with ([[LOOPS_REDUCE]] -> %arg2 = 0 to 5) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg2] : memref<5xf32> - // CHECK: [[LOAD_1:%.+]] = load %arg1[%arg2] : memref<5xf32> - // CHECK: [[LOAD_RES:%.+]] = load [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg2] : memref<5xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg1[%arg2] : memref<5xf32> + // CHECK: [[LOAD_RES:%.+]] = affine.load [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> // CHECK: [[MUL:%.+]] = mulf [[LOAD_0]], [[LOAD_1]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD_RES]], [[MUL]] : f32 - // CHECK: store [[ADD]], [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> + // CHECK: affine.store [[ADD]], [[RES]][%[[CONSTANT_INDEX]]] : memref<1xf32> // CHECK: } // CHECK: return [[RES]] : memref<1xf32> } @@ -1328,7 +1328,7 @@ func @test_conv_no_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg4 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg5 = 0 to 58) { - // CHECK: store [[CONST1]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x27x58xf32> + // CHECK: affine.store [[CONST1]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x27x58xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 @@ -1373,7 +1373,7 @@ func @test_conv_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg5 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg6 = 0 to 58) { - // CHECK: store [[CONST1]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32> + // CHECK: affine.store [[CONST1]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 @@ -1416,23 +1416,22 @@ func @test_conv_no_bias_no_pad_w_group(%arg0 : tensor<1x9x32x64xf32>, %arg1 : te // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_OUTER_LOOPS]]#0, [[OPT_OUTER_LOOPS]]#1, [[OPT_OUTER_LOOPS]]#2) with ([[OUTER_LOOPS]]#0 -> %arg2 = 0 to 1, [[OUTER_LOOPS]]#1 -> %arg3 = 0 to 3, [[OUTER_LOOPS]]#2 -> %arg4 = 0 to 1) { - // CHECK: [[MUL1:%.+]] = muli %arg3, [[CONST0]] : index - // CHECK: %[[ADD1:.+]] = addi [[MUL1]], %arg4 : index + // CHECK: %[[ADD1:.+]] = affine.apply #{{.*}}(%arg3, [[CONST0]])[%arg4] // CHECK: [[SPATIAL_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[OPT_SPATIAL_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: krnl.return_loops [[SPATIAL_LOOPS]]#0, [[SPATIAL_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg5 = 0 to 27, [[SPATIAL_LOOPS]]#1 -> %arg6 = 0 to 58) { - // CHECK: store [[CONST1]], [[RES]][%arg2, %[[ADD1]], %arg5, %arg6] : memref<1x5x27x58xf32> + // CHECK: affine.store [[CONST1]], [[RES]][%arg2, %[[ADD1]], %arg5, %arg6] : memref<1x5x27x58xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg7 = 0 to 3, [[INNER_LOOPS]]#1 -> %arg8 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg9 = 0 to 7) { - // CHECK: [[ADD2:%.+]] = affine.apply #{{.*}}(%arg3, %arg7)[%c3] - // CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg5, %arg8) + // CHECK: [[ADD2:%.+]] = affine.apply #{{.*}}(%arg3, %arg7)[%c3] + // CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg5, %arg8) // CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg6, %arg9) // CHECK: [[DATA:%.+]] = affine.load %arg0[%arg2, [[ADD2]], [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x9x32x64xf32> // CHECK: [[KERNEL:%.+]] = affine.load %arg1[%[[ADD1]], %arg7, %arg8, %arg9] : memref<5x3x6x7xf32> @@ -1471,7 +1470,7 @@ func @test_conv_no_bias_no_pad_w_strides(%arg0 : tensor<1x9x32x64xf32>, %arg1 : // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_SPATIAL_LOOPS]]#0, [[OPT_SPATIAL_LOOPS]]#1) with ([[SPATIAL_LOOPS]]#0 -> %arg4 = 0 to 14, [[SPATIAL_LOOPS]]#1 -> %arg5 = 0 to 29) { - // CHECK: store [[CONST1]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x14x29xf32> + // CHECK: affine.store [[CONST1]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x14x29xf32> // CHECK: [[INNER_LOOPS:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_INNER_LOOPS:%.+]]:3 = krnl.optimize_loops { // CHECK: krnl.return_loops [[INNER_LOOPS]]#0, [[INNER_LOOPS]]#1, [[INNER_LOOPS]]#2 @@ -1507,19 +1506,19 @@ func @test_batchnorm_testmode_Nd(%arg0: tensor<1x2x1x3xf32>, %arg1: tensor<2xf32 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1, [[DEF_LOOPS]]#2, [[DEF_LOOPS]]#3 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#1 -> %arg5 = 0 to 2) { - // CHECK: [[SCALE:%.+]] = load %arg1[%arg5] : memref<2xf32> - // CHECK: [[BIAS:%.+]] = load %arg2[%arg5] : memref<2xf32> - // CHECK: [[MEAN:%.+]] = load %arg3[%arg5] : memref<2xf32> - // CHECK: [[VARIANCE:%.+]] = load %arg4[%arg5] : memref<2xf32> + // CHECK: [[SCALE:%.+]] = affine.load %arg1[%arg5] : memref<2xf32> + // CHECK: [[BIAS:%.+]] = affine.load %arg2[%arg5] : memref<2xf32> + // CHECK: [[MEAN:%.+]] = affine.load %arg3[%arg5] : memref<2xf32> + // CHECK: [[VARIANCE:%.+]] = affine.load %arg4[%arg5] : memref<2xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#2, [[OPT_LOOPS]]#3) with ([[DEF_LOOPS]]#0 -> %arg6 = 0 to 1, [[DEF_LOOPS]]#2 -> %arg7 = 0 to 1, [[DEF_LOOPS]]#3 -> %arg8 = 0 to 3) { - // CHECK: [[LOADED_VAL:%.+]] = load %arg0[%arg6, %arg5, %arg7, %arg8] : memref<1x2x1x3xf32> + // CHECK: [[LOADED_VAL:%.+]] = affine.load %arg0[%arg6, %arg5, %arg7, %arg8] : memref<1x2x1x3xf32> // CHECK: [[DIVIDEND:%.+]] = subf [[LOADED_VAL]], [[MEAN]] : f32 // CHECK: [[ADJUSTED_VARIANCE:%.+]] = addf [[VARIANCE]], [[EPSILON]] : f32 // CHECK: [[DIVISOR:%.+]] = sqrt [[ADJUSTED_VARIANCE]] : f32 // CHECK: [[NORM:%.+]] = divf [[DIVIDEND]], [[DIVISOR]] : f32 // CHECK: [[SCALE_NORM:%.+]] = mulf [[SCALE]], [[NORM]] : f32 // CHECK: [[SHIFT_SCALE_NORM:%.+]] = addf [[SCALE_NORM]], [[BIAS]] : f32 - // CHECK: store [[SHIFT_SCALE_NORM]], [[RES]][%arg6, %arg5, %arg7, %arg8] : memref<1x2x1x3xf32> + // CHECK: affine.store [[SHIFT_SCALE_NORM]], [[RES]][%arg6, %arg5, %arg7, %arg8] : memref<1x2x1x3xf32> // CHECK: } // CHECK: } // CHECK: return [[RES]] : memref<1x2x1x3xf32> @@ -1539,19 +1538,19 @@ func @test_batchnorm_testmode_1d(%arg0: tensor<10xf32>, %arg1: tensor<1xf32>, %a // CHECK: krnl.return_loops [[DEF_LOOPS]] // CHECK: } : () -> !krnl.loop // CHECK: %[[ZERO_INDEX:.+]] = constant 0 : index - // CHECK: [[SCALE:%.+]] = load %arg1[%[[ZERO_INDEX]]] : memref<1xf32> - // CHECK: [[BIAS:%.+]] = load %arg2[%[[ZERO_INDEX]]] : memref<1xf32> - // CHECK: [[MEAN:%.+]] = load %arg3[%[[ZERO_INDEX]]] : memref<1xf32> - // CHECK: [[VARIANCE:%.+]] = load %arg4[%[[ZERO_INDEX]]] : memref<1xf32> + // CHECK: [[SCALE:%.+]] = affine.load %arg1[%[[ZERO_INDEX]]] : memref<1xf32> + // CHECK: [[BIAS:%.+]] = affine.load %arg2[%[[ZERO_INDEX]]] : memref<1xf32> + // CHECK: [[MEAN:%.+]] = affine.load %arg3[%[[ZERO_INDEX]]] : memref<1xf32> + // CHECK: [[VARIANCE:%.+]] = affine.load %arg4[%[[ZERO_INDEX]]] : memref<1xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]) with ([[DEF_LOOPS]] -> %arg5 = 0 to 10) { - // CHECK: [[LOADED_VAL:%.+]] = load %arg0[%arg5] : memref<10xf32> + // CHECK: [[LOADED_VAL:%.+]] = affine.load %arg0[%arg5] : memref<10xf32> // CHECK: [[DIVIDEND:%.+]] = subf [[LOADED_VAL]], [[MEAN]] : f32 // CHECK: [[ADJUSTED_VARIANCE:%.+]] = addf [[VARIANCE]], [[EPSILON]] : f32 // CHECK: [[DIVISOR:%.+]] = sqrt [[ADJUSTED_VARIANCE]] : f32 // CHECK: [[NORM:%.+]] = divf [[DIVIDEND]], [[DIVISOR]] : f32 // CHECK: [[SCALE_NORM:%.+]] = mulf [[SCALE]], [[NORM]] : f32 // CHECK: [[SHIFT_SCALE_NORM:%.+]] = addf [[SCALE_NORM]], [[BIAS]] : f32 - // CHECK: store [[SHIFT_SCALE_NORM]], [[RES]][%arg5] : memref<10xf32> + // CHECK: affine.store [[SHIFT_SCALE_NORM]], [[RES]][%arg5] : memref<10xf32> // CHECK: } // CHECK: return [[RES]] : memref<10xf32> } @@ -1614,17 +1613,16 @@ func @test_constant_pad1(%arg0: tensor<16x16xf32>) -> tensor<18x20xf32> { // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 18, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 20) { // CHECK: [[CST:%.+]] = constant 0.000000e+00 : f32 - // CHECK: store [[CST]], [[RES]][%arg1, %arg2] : memref<18x20xf32> + // CHECK: affine.store [[CST]], [[RES]][%arg1, %arg2] : memref<18x20xf32> // CHECK: } // CHECK: [[DEF_LOOPS2:%.+]]:2 = krnl.define_loops 2 // CHECK: [[OPT_LOOPS2:%.+]]:2 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 16, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 16) { - // CHECK: [[CST1:%.+]] = constant 3 : index - // CHECK: [[ADD:%.+]] = addi [[CST1]], %arg2 : index - // CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<16x16xf32> - // CHECK: store [[LOAD]], [[RES]][%arg1, [[ADD]]] : memref<18x20xf32> + // CHECK: [[ADD:%.+]] = affine.apply #{{.*}}(%arg2) + // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<16x16xf32> + // CHECK: affine.store [[LOAD]], [[RES]][%arg1, [[ADD]]] : memref<18x20xf32> // CHECK: } } @@ -1640,17 +1638,16 @@ func @test_pad1(%arg0: tensor<16x16xf32>) -> tensor<18x20xf32> { // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1) with ([[DEF_LOOPS1]]#0 -> %arg1 = 0 to 18, [[DEF_LOOPS1]]#1 -> %arg2 = 0 to 20) { // CHECK: [[CST:%.+]] = constant 0.000000e+00 : f32 - // CHECK: store [[CST]], [[RES]][%arg1, %arg2] : memref<18x20xf32> + // CHECK: affine.store [[CST]], [[RES]][%arg1, %arg2] : memref<18x20xf32> // CHECK: } // CHECK: [[DEF_LOOPS2:%.+]]:2 = krnl.define_loops 2 // CHECK: [[OPT_LOOPS2:%.+]]:2 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1) with ([[DEF_LOOPS2]]#0 -> %arg1 = 0 to 16, [[DEF_LOOPS2]]#1 -> %arg2 = 0 to 16) { - // CHECK: [[CST1:%.+]] = constant 3 : index - // CHECK: [[ADD:%.+]] = addi [[CST1]], %arg2 : index - // CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<16x16xf32> - // CHECK: store [[LOAD]], [[RES]][%arg1, [[ADD]]] : memref<18x20xf32> + // CHECK: [[ADD:%.+]] = affine.apply #{{.*}}(%arg2) + // CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<16x16xf32> + // CHECK: affine.store [[LOAD]], [[RES]][%arg1, [[ADD]]] : memref<18x20xf32> // CHECK: } } @@ -1677,28 +1674,26 @@ func @test_concat_1(%arg0 : tensor<5x5x1x32xf32>, %arg1 : tensor<5x5x3x32xf32>, // CHECK: krnl.return_loops [[DEF_LOOPS0]]#0, [[DEF_LOOPS0]]#1, [[DEF_LOOPS0]]#2, [[DEF_LOOPS0]]#3 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS0]]#0, [[OPT_LOOPS0]]#1, [[OPT_LOOPS0]]#2, [[OPT_LOOPS0]]#3) with ([[DEF_LOOPS0]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS0]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS0]]#2 -> %arg5 = 0 to 1, [[DEF_LOOPS0]]#3 -> %arg6 = 0 to 32) { - // CHECK: [[LOAD0:%.+]] = load %arg0[%arg3, %arg4, %arg5, %arg6] : memref<5x5x1x32xf32> - // CHECK: store [[LOAD0]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<5x5x9x32xf32> + // CHECK: [[LOAD0:%.+]] = affine.load %arg0[%arg3, %arg4, %arg5, %arg6] : memref<5x5x1x32xf32> + // CHECK: affine.store [[LOAD0]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<5x5x9x32xf32> // CHECK: [[DEF_LOOPS1:%.+]]:4 = krnl.define_loops 4 // CHECK: [[OPT_LOOPS1:%.+]]:4 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS1]]#0, [[DEF_LOOPS1]]#1, [[DEF_LOOPS1]]#2, [[DEF_LOOPS1]]#3 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS1]]#0, [[OPT_LOOPS1]]#1, [[OPT_LOOPS1]]#2, [[OPT_LOOPS1]]#3) with ([[DEF_LOOPS1]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS1]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS1]]#2 -> %arg5 = 0 to 3, [[DEF_LOOPS1]]#3 -> %arg6 = 0 to 32) { - // CHECK: [[OFF1:%.+]] = constant 1 : index - // CHECK: [[ADD1:%.+]] = addi [[OFF1]], %arg5 : index - // CHECK: [[LOAD1:%.+]] = load %arg1[%arg3, %arg4, %arg5, %arg6] : memref<5x5x3x32xf32> - // CHECK: store [[LOAD1]], [[RES]][%arg3, %arg4, [[ADD1]], %arg6] : memref<5x5x9x32xf32> + // CHECK: [[AFFINE_APPLY1:%.+]] = affine.apply #{{.*}}(%arg5) + // CHECK: [[LOAD1:%.+]] = affine.load %arg1[%arg3, %arg4, %arg5, %arg6] : memref<5x5x3x32xf32> + // CHECK: affine.store [[LOAD1]], [[RES]][%arg3, %arg4, [[AFFINE_APPLY1]], %arg6] : memref<5x5x9x32xf32> // CHECK: [[DEF_LOOPS2:%.+]]:4 = krnl.define_loops 4 // CHECK: [[OPT_LOOPS2:%.+]]:4 = krnl.optimize_loops { // CHECK: krnl.return_loops [[DEF_LOOPS2]]#0, [[DEF_LOOPS2]]#1, [[DEF_LOOPS2]]#2, [[DEF_LOOPS2]]#3 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS2]]#0, [[OPT_LOOPS2]]#1, [[OPT_LOOPS2]]#2, [[OPT_LOOPS2]]#3) with ([[DEF_LOOPS2]]#0 -> %arg3 = 0 to 5, [[DEF_LOOPS2]]#1 -> %arg4 = 0 to 5, [[DEF_LOOPS2]]#2 -> %arg5 = 0 to 5, [[DEF_LOOPS2]]#3 -> %arg6 = 0 to 32) { - // CHECK: [[OFF2:%.+]] = constant 4 : index - // CHECK: [[ADD2:%.+]] = addi [[OFF2]], %arg5 : index - // CHECK: [[LOAD2:%.+]] = load %arg2[%arg3, %arg4, %arg5, %arg6] : memref<5x5x5x32xf32> - // CHECK: store [[LOAD2]], [[RES]][%arg3, %arg4, [[ADD2]], %arg6] : memref<5x5x9x32xf32> + // CHECK: [[AFFINE_APPLY2:%.+]] = affine.apply #{{.*}}(%arg5) + // CHECK: [[LOAD2:%.+]] = affine.load %arg2[%arg3, %arg4, %arg5, %arg6] : memref<5x5x5x32xf32> + // CHECK: affine.store [[LOAD2]], [[RES]][%arg3, %arg4, [[AFFINE_APPLY2]], %arg6] : memref<5x5x9x32xf32> // CHECK: return [[RES]] : memref<5x5x9x32xf32> } @@ -1725,20 +1720,20 @@ func @test_pool_general_computation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_OUTPUT_LOOPS]]#0, [[OPT_OUTPUT_LOOPS]]#1, [[OPT_OUTPUT_LOOPS]]#2, [[OPT_OUTPUT_LOOPS]]#3) with ([[OUTPUT_LOOPS]]#0 -> %arg1 = 0 to 1, [[OUTPUT_LOOPS]]#1 -> %arg2 = 0 to 3, [[OUTPUT_LOOPS]]#2 -> %arg3 = 0 to 31, [[OUTPUT_LOOPS]]#3 -> %arg4 = 0 to 31) { - // CHECK: store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: [[POOL_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[OPT_POOL_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: krnl.return_loops [[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #map3(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #map3(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { // CHECK: {{.*}} = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> - // CHECK: {{.*}} = load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: } - // CHECK: {{.*}} = load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK: store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: } } @@ -1768,7 +1763,7 @@ func @test_averagepool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<* // CHECK-LABEL: @test_averagepool_identity_value // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> // CHECK: [[IDENTITY:%.+]] = constant 0.000000e+00 : f32 - // CHECK: store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> } // ----- @@ -1780,7 +1775,7 @@ func @test_maxpool_identity_value(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*xf32 // CHECK-LABEL: @test_maxpool_identity_value // CHECK: [[RES:%.+]] = alloc() : memref<1x3x31x31xf32> // CHECK: [[IDENTITY:%.+]] = constant 0xFF800000 : f32 - // CHECK: store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store [[IDENTITY]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> } // ----- @@ -1802,17 +1797,17 @@ func @test_averagepool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tenso // CHECK: [[OPT_POOL_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: krnl.return_loops [[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #map3(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #map3(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> - // CHECK: [[OUTPUT_LOAD:%.+]] = load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: [[SUM:%.+]] = addf [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 - // CHECK: store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store [[SUM]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: } - // CHECK: [[NUMERATOR:%.+]] = load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: [[NUMERATOR:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: [[AVERAGE:%.+]] = divf [[NUMERATOR]], {{.*}} : f32 - // CHECK: store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store [[AVERAGE]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: } } @@ -1835,17 +1830,17 @@ func @test_maxpool_pooling_operation(%arg0 : tensor<1x3x32x32xf32>) -> tensor<*x // CHECK: [[OPT_POOL_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: krnl.return_loops [[POOL_LOOPS]]#0, [[POOL_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) - // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #map3(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #map3(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { + // CHECK: krnl.iterate([[OPT_POOL_LOOPS]]#0, [[OPT_POOL_LOOPS]]#1) with ([[POOL_LOOPS]]#0 -> %arg5 = 0 to min #{{.*}}(%arg3)[%c32, %c2, %c0, %c1, %c1_0], [[POOL_LOOPS]]#1 -> %arg6 = 0 to min #{{.*}}(%arg4)[%c32_1, %c2_2, %c0_3, %c1_4, %c1_5]) { // CHECK: [[INPUT_LOAD:%.+]] = load %arg0[%arg1, %arg2, {{.*}}, {{.*}}] : memref<1x3x32x32xf32> - // CHECK: [[OUTPUT_LOAD:%.+]] = load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: [[OUTPUT_LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: [[GREATER:%.+]] = cmpf "ogt", [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 // CHECK: [[SELECT:%.+]] = select [[GREATER]], [[OUTPUT_LOAD]], [[INPUT_LOAD]] : f32 - // CHECK: store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: } - // CHECK-NOT: {{.*}} = load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> - // CHECK-NOT: store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK-NOT: {{.*}} = affine.load [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> + // CHECK-NOT: affine.store {{.*}}, [[RES]][%arg1, %arg2, %arg3, %arg4] : memref<1x3x31x31xf32> // CHECK: } } @@ -1869,8 +1864,8 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: krnl.return_loops [[INITIALIZE_LOOPS]]#0, [[INITIALIZE_LOOPS]]#1, [[INITIALIZE_LOOPS]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[INITIALIZE_OPT_LOOPS]]#0, [[INITIALIZE_OPT_LOOPS]]#1, [[INITIALIZE_OPT_LOOPS]]#2) with ([[INITIALIZE_LOOPS]]#0 -> %arg3 = 0 to 1, [[INITIALIZE_LOOPS]]#1 -> %arg4 = 0 to 3, [[INITIALIZE_LOOPS]]#2 -> %arg5 = 0 to 3) { - // CHECK: store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> - // CHECK: store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: affine.store [[INITIAL_VALUE]], [[HIDDEN_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: affine.store [[INITIAL_VALUE]], [[CELL_STATE]][%arg3, %arg4, %arg5] : memref<1x3x3xf32> // CHECK: } // CHECK: [[SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 @@ -1898,26 +1893,26 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: [[ct:%.+]] = alloc() : memref // CHECK: [[Ft:%.+]] = alloc() : memref // CHECK: [[It:%.+]] = alloc() : memref - // CHECK: [[Ht1_LOAD:%.+]] = load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> - // CHECK: [[Ct1_LOAD:%.+]] = load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: [[Ht1_LOAD:%.+]] = affine.load [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: [[Ct1_LOAD:%.+]] = affine.load [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> // CHECK: [[ZERO_FLOAT:%.+]] = constant 0.000000e+00 : f32 // CHECK: [[XtWi_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWi_GEMM]][] : memref // CHECK: [[Ht1Ri_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ri_GEMM]][] : memref // CHECK: [[XtWo_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWo_GEMM]][] : memref // CHECK: [[Ht1Ro_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Ro_GEMM]][] : memref // CHECK: [[XtWf_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWf_GEMM]][] : memref // CHECK: [[Ht1Rf_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rf_GEMM]][] : memref // CHECK: [[XtWc_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[XtWc_GEMM]][] : memref // CHECK: [[Ht1Rc_GEMM:%.+]] = alloc() : memref - // CHECK: store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref + // CHECK: affine.store [[ZERO_FLOAT]], [[Ht1Rc_GEMM]][] : memref // CHECK: [[REDUCTION_LOOPS:%.+]] = krnl.define_loops 1 // CHECK: [[REDUCTION_OPT_LOOPS:%.+]] = krnl.optimize_loops { @@ -1928,63 +1923,63 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: [[OUTPUT_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c1, %c3] // CHECK: [[FORGET_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c2, %c3] // CHECK: [[CELL_HIDDEN_INDEX:%.+]] = affine.apply #{{.*}}(%arg5)[%c3_2, %c3] - // CHECK: [[Xt_LOAD:%.+]] = load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32> + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, %arg4, %arg6] : memref<4x3x2xf32> - // CHECK: [[Wi_LOAD:%.+]] = load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: [[Wi_LOAD:%.+]] = affine.load %arg1[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wi_LOAD]] : f32 - // CHECK: {{.*}} = load [[XtWi_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[XtWi_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[XtWi_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[XtWi_GEMM]][] : memref - // CHECK: [[Ri_LOAD:%.+]] = load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: [[Ri_LOAD:%.+]] = affine.load %arg2[%c0, [[INPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ri_LOAD]] : f32 - // CHECK: {{.*}} = load [[Ht1Ri_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[Ht1Ri_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[Ht1Ri_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[Ht1Ri_GEMM]][] : memref - // CHECK: [[Wo_LOAD:%.+]] = load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: [[Wo_LOAD:%.+]] = affine.load %arg1[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wo_LOAD]] : f32 - // CHECK: {{.*}} = load [[XtWo_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[XtWo_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[XtWo_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[XtWo_GEMM]][] : memref - // CHECK: [[Ro_LOAD:%.+]] = load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: [[Ro_LOAD:%.+]] = affine.load %arg2[%c0, [[OUTPUT_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Ro_LOAD]] : f32 - // CHECK: {{.*}} = load [[Ht1Ro_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[Ht1Ro_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[Ht1Ro_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[Ht1Ro_GEMM]][] : memref - // CHECK: [[Wf_LOAD:%.+]] = load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: [[Wf_LOAD:%.+]] = affine.load %arg1[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wf_LOAD]] : f32 - // CHECK: {{.*}} = load [[XtWf_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[XtWf_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[XtWf_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[XtWf_GEMM]][] : memref - // CHECK: [[Rf_LOAD:%.+]] = load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: [[Rf_LOAD:%.+]] = affine.load %arg2[%c0, [[FORGET_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rf_LOAD]] : f32 - // CHECK: {{.*}} = load [[Ht1Rf_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[Ht1Rf_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[Ht1Rf_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[Ht1Rf_GEMM]][] : memref - // CHECK: [[Wc_LOAD:%.+]] = load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> + // CHECK: [[Wc_LOAD:%.+]] = affine.load %arg1[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x2xf32> // CHECK: {{.*}} = mulf [[Xt_LOAD]], [[Wc_LOAD]] : f32 - // CHECK: {{.*}} = load [[XtWc_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[XtWc_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[XtWc_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[XtWc_GEMM]][] : memref - // CHECK: [[Rc_LOAD:%.+]] = load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> + // CHECK: [[Rc_LOAD:%.+]] = affine.load %arg2[%c0, [[CELL_HIDDEN_INDEX]], %arg6] : memref<1x12x3xf32> // CHECK: {{.*}} = mulf [[Ht1_LOAD]], [[Rc_LOAD]] : f32 - // CHECK: {{.*}} = load [[Ht1Rc_GEMM]][] : memref + // CHECK: {{.*}} = affine.load [[Ht1Rc_GEMM]][] : memref // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 - // CHECK: store {{.*}}, [[Ht1Rc_GEMM]][] : memref + // CHECK: affine.store {{.*}}, [[Ht1Rc_GEMM]][] : memref // CHECK: } - // CHECK: [[XtWi_LOAD:%.+]] = load [[XtWi_GEMM]][] : memref - // CHECK: [[Ht1Ri_LOAD:%.+]] = load [[Ht1Ri_GEMM]][] : memref + // CHECK: [[XtWi_LOAD:%.+]] = affine.load [[XtWi_GEMM]][] : memref + // CHECK: [[Ht1Ri_LOAD:%.+]] = affine.load [[Ht1Ri_GEMM]][] : memref // CHECK: [[It_OUTPUT:%.+]] = addf [[XtWi_LOAD]], [[Ht1Ri_LOAD]] : f32 // CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref - // CHECK: store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref + // CHECK: affine.store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref // CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref // CHECK: {{.*}} = constant 0.000000e+00 : f32 // CHECK: {{.*}} = constant 1.000000e+00 : f32 @@ -1993,14 +1988,14 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 // CHECK: affine.store {{.*}}, [[It]][] : memref - // CHECK: [[It_LOAD:%.+]] = load [[It]][] : memref + // CHECK: [[It_LOAD:%.+]] = affine.load [[It]][] : memref - // CHECK: [[XtWf_LOAD:%.+]] = load [[XtWf_GEMM]][] : memref - // CHECK: [[Ht1Rf_LOAD:%.+]] = load [[Ht1Rf_GEMM]][] : memref + // CHECK: [[XtWf_LOAD:%.+]] = affine.load [[XtWf_GEMM]][] : memref + // CHECK: [[Ht1Rf_LOAD:%.+]] = affine.load [[Ht1Rf_GEMM]][] : memref // CHECK: [[Ft_OUTPUT:%.+]] = addf [[XtWf_LOAD]], [[Ht1Rf_LOAD]] : f32 // CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref - // CHECK: store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref + // CHECK: affine.store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref // CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref // CHECK: {{.*}} = constant 0.000000e+00 : f32 // CHECK: {{.*}} = constant 1.000000e+00 : f32 @@ -2009,14 +2004,14 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 // CHECK: affine.store {{.*}}, [[Ft]][] : memref - // CHECK: [[Ft_LOAD:%.+]] = load [[Ft]][] : memref + // CHECK: [[Ft_LOAD:%.+]] = affine.load [[Ft]][] : memref - // CHECK: [[XtWc_LOAD:%.+]] = load [[XtWc_GEMM]][] : memref - // CHECK: [[Ht1Rc_LOAD:%.+]] = load [[Ht1Rc_GEMM]][] : memref + // CHECK: [[XtWc_LOAD:%.+]] = affine.load [[XtWc_GEMM]][] : memref + // CHECK: [[Ht1Rc_LOAD:%.+]] = affine.load [[Ht1Rc_GEMM]][] : memref // CHECK: [[ct_OUTPUT:%.+]] = addf [[XtWc_LOAD]], [[Ht1Rc_LOAD]] : f32 // CHECK: [[TANH_CELL:%.+]] = alloc() : memref - // CHECK: store [[ct_OUTPUT]], [[TANH_CELL]][] : memref + // CHECK: affine.store [[ct_OUTPUT]], [[TANH_CELL]][] : memref // CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref // CHECK: {{.*}} = constant 0.000000e+00 : f32 // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 @@ -2026,19 +2021,19 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 // CHECK: affine.store {{.*}}, [[ct]][] : memref - // CHECK: [[ct_LOAD:%.+]] = load [[ct]][] : memref + // CHECK: [[ct_LOAD:%.+]] = affine.load [[ct]][] : memref // CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32 // CHECK: [[Itct:%.+]] = mulf [[It_LOAD]], [[ct_LOAD]] : f32 // CHECK: [[Ct:%.+]] = addf [[FtCt1]], [[Itct]] : f32 - // CHECK: store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: affine.store [[Ct]], [[CELL_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> - // CHECK: [[XtWo_LOAD:%.+]] = load [[XtWo_GEMM]][] : memref - // CHECK: [[Ht1Ro_LOAD:%.+]] = load [[Ht1Ro_GEMM]][] : memref + // CHECK: [[XtWo_LOAD:%.+]] = affine.load [[XtWo_GEMM]][] : memref + // CHECK: [[Ht1Ro_LOAD:%.+]] = affine.load [[Ht1Ro_GEMM]][] : memref // CHECK: [[Ot_OUTPUT:%.+]] = addf [[XtWo_LOAD]], [[Ht1Ro_LOAD]] : f32 // CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref - // CHECK: store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref + // CHECK: affine.store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref // CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref // CHECK: {{.*}} = constant 0.000000e+00 : f32 // CHECK: {{.*}} = constant 1.000000e+00 : f32 @@ -2047,10 +2042,10 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 // CHECK: affine.store {{.*}}, [[Ot]][] : memref - // CHECK: [[Ot_LOAD:%.+]] = load [[Ot]][] : memref + // CHECK: [[Ot_LOAD:%.+]] = affine.load [[Ot]][] : memref // CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref - // CHECK: store [[Ct]], [[TANH_HIDDEN]][] : memref + // CHECK: affine.store [[Ct]], [[TANH_HIDDEN]][] : memref // CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref // CHECK: {{.*}} = constant 0.000000e+00 : f32 // CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32 @@ -2060,10 +2055,10 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12 // CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32 // CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32 // CHECK: affine.store {{.*}}, [[hCt]][] : memref - // CHECK: [[hCt_LOAD:%.+]] = load [[hCt]][] : memref + // CHECK: [[hCt_LOAD:%.+]] = affine.load [[hCt]][] : memref // CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32 - // CHECK: store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> + // CHECK: affine.store [[Ht]], [[HIDDEN_STATE]][%c0, %arg4, %arg5] : memref<1x3x3xf32> // CHECK: dealloc [[XtWi_GEMM]] : memref // CHECK: dealloc [[XtWo_GEMM]] : memref @@ -2101,7 +2096,7 @@ func @test_lstm_reverse_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x2xf32> // CHECK: krnl.iterate([[REVERSE_SEQUENCE_OPT_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}} - // CHECK: [[Xt_LOAD:%.+]] = load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> } // ----- @@ -2119,7 +2114,7 @@ func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x // CHECK: krnl.return_loops [[SEQUENCE_LOOPS]] // CHECK: } : () -> !krnl.loop // CHECK: krnl.iterate([[SEQUENCE_OPT_LOOPS]]) with ([[SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { - // CHECK: [[Xt_LOAD:%.+]] = load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32> + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%arg3, {{.*}}, {{.*}}] : memref<4x3x2xf32> // CHECK: [[REVERSE_SEQUENCE_LOOPS:%.+]] = krnl.define_loops 1 // CHECK: [[REVERSE_SEQUENCE_OPT_LOOPS:%.+]] = krnl.optimize_loops { @@ -2128,7 +2123,7 @@ func @test_lstm_bidirectional_mode(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12x // CHECK: krnl.iterate([[REVERSE_SEQUENCE_OPT_LOOPS]]) with ([[REVERSE_SEQUENCE_LOOPS]] -> %arg3 = 0 to 4) { // CHECK: %[[SEQUENCE_LEN:.+]] = constant 4 : index // CHECK: %[[REVERSE_SEQUENCE_IV:.+]] = affine.apply [[REVERSE_IV_MAP]](%arg3)[%[[SEQUENCE_LEN]]{{]}} - // CHECK: [[Xt_LOAD:%.+]] = load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> + // CHECK: [[Xt_LOAD:%.+]] = affine.load %arg0[%[[REVERSE_SEQUENCE_IV]], {{.*}}, {{.*}}] : memref<4x3x2xf32> } // ----- @@ -2176,8 +2171,8 @@ func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<* // CHECK: krnl.return_loops [[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOP_0]]#0, [[OPT_LOOP_0]]#1, [[OPT_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_LOOP_1:%.+]]:3 = krnl.optimize_loops { @@ -2185,8 +2180,8 @@ func @test_split_equal(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tensor<* // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOP_1]]#0, [[OPT_LOOP_1]]#1, [[OPT_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 8, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 32, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg1) - // CHECK: [[LOAD_1:%.+]] = load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%[[INDEX]], %arg2, %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<8x32x64xf32> // CHECK: } // CHECK: return [[RES_0]], [[RES_1]] : memref<8x32x64xf32>, memref<8x32x64xf32> } @@ -2207,8 +2202,8 @@ func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tenso // CHECK: krnl.return_loops [[DEF_LOOP_0]]#0, [[DEF_LOOP_0]]#1, [[DEF_LOOP_0]]#2 // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOP_0]]#0, [[OPT_LOOP_0]]#1, [[OPT_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> - // CHECK: store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32> + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref<16x2x64xf32> // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_LOOP_1:%.+]]:3 = krnl.optimize_loops { @@ -2216,8 +2211,8 @@ func @test_split_variable(%arg0 : tensor<16x32x64xf32>) -> (tensor<*xf32>, tenso // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOP_1]]#0, [[OPT_LOOP_1]]#1, [[OPT_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to 16, [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) - // CHECK: [[LOAD_1:%.+]] = load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32> - // CHECK: store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32> + // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref<16x32x64xf32> + // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref<16x30x64xf32> // CHECK: } // CHECK: return [[RES_0]], [[RES_1]] : memref<16x2x64xf32>, memref<16x30x64xf32> } @@ -2241,8 +2236,8 @@ func @test_split_unknown_dimension(%arg0 : tensor) -> (tensor<*xf32> // CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop) // CHECK: [[DIM_0:%.+]] = dim [[RES_0]], 0 : memref // CHECK: krnl.iterate([[OPT_LOOP_0]]#0, [[OPT_LOOP_0]]#1, [[OPT_LOOP_0]]#2) with ([[DEF_LOOP_0]]#0 -> %arg1 = 0 to [[DIM_0]], [[DEF_LOOP_0]]#1 -> %arg2 = 0 to 2, [[DEF_LOOP_0]]#2 -> %arg3 = 0 to 64) { - // CHECK: [[LOAD_0:%.+]] = load %arg0[%arg1, %arg2, %arg3] : memref - // CHECK: store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref + // CHECK: [[LOAD_0:%.+]] = affine.load %arg0[%arg1, %arg2, %arg3] : memref + // CHECK: affine.store [[LOAD_0]], [[RES_0]][%arg1, %arg2, %arg3] : memref // CHECK: } // CHECK: [[DEF_LOOP_1:%.+]]:3 = krnl.define_loops 3 // CHECK: [[OPT_LOOP_1:%.+]]:3 = krnl.optimize_loops { @@ -2251,8 +2246,8 @@ func @test_split_unknown_dimension(%arg0 : tensor) -> (tensor<*xf32> // CHECK: [[DIM_1:%.+]] = dim [[RES_1]], 0 : memref // CHECK: krnl.iterate([[OPT_LOOP_1]]#0, [[OPT_LOOP_1]]#1, [[OPT_LOOP_1]]#2) with ([[DEF_LOOP_1]]#0 -> %arg1 = 0 to [[DIM_1]], [[DEF_LOOP_1]]#1 -> %arg2 = 0 to 30, [[DEF_LOOP_1]]#2 -> %arg3 = 0 to 64) { // CHECK: %[[INDEX:.+]] = affine.apply [[INDEX_MAP]](%arg2) - // CHECK: [[LOAD_1:%.+]] = load %arg0[%arg1, %[[INDEX]], %arg3] : memref - // CHECK: store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref + // CHECK: [[LOAD_1:%.+]] = affine.load %arg0[%arg1, %[[INDEX]], %arg3] : memref + // CHECK: affine.store [[LOAD_1]], [[RES_1]][%arg1, %arg2, %arg3] : memref // CHECK: } // CHECK: return [[RES_0]], [[RES_1]] : memref, memref } diff --git a/test/mlir/onnx/onnx_lowering_with_dealloc.mlir b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir index d531902..d931fc8 100644 --- a/test/mlir/onnx/onnx_lowering_with_dealloc.mlir +++ b/test/mlir/onnx/onnx_lowering_with_dealloc.mlir @@ -16,10 +16,10 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> /// Second Add // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -27,10 +27,10 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xf32> @@ -55,10 +55,10 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> /// Second Mul // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -66,10 +66,10 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xf32> @@ -94,10 +94,10 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> /// Second Div // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -105,10 +105,10 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xf32> @@ -133,10 +133,10 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> /// Second Sub // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -144,10 +144,10 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xf32> @@ -172,10 +172,10 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi1> /// Second And // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -183,10 +183,10 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xi1> @@ -211,10 +211,10 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> /// Second Or // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -222,10 +222,10 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<* // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xi1> @@ -250,10 +250,10 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> /// Second Xor // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -261,10 +261,10 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi1> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 - // CHECK: store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> + // CHECK: affine.store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xi1> @@ -585,10 +585,10 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32> /// Second Sum // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -596,10 +596,10 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xf32> @@ -624,11 +624,11 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> /// Second Max // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -636,11 +636,11 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xf32> @@ -665,11 +665,11 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> /// Second Min // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 @@ -677,11 +677,11 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { - // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> - // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 - // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> + // CHECK: affine.store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> /// Dealloc of first result. // CHECK: dealloc [[RES]] : memref<10x10xf32>