Replace std.load/std.store by affine.load/affine.store (#180)

* Move to more recent LLVM ID (May 15)

* clang-format

* Bump cache version up

* Update readme

* Fix doc check

* Move to a newer commit id

* Update LoopToStandard -> SCFToStandard

* Change MLIRSideEffects to MLIRSideEffectInterfaces

* Add AffineScope trait to KrnlIterateOp

* [ElementWise] Load/Store op to AffineLoad/AffineStore op

* [Gemm, MatMul, Reduction, Softmax] Load/Store op to AffineLoad/AffineStore op

* [Concat] Load/Store op to AffineLoad/AffineStore op

* [Pad, PadConstantValuePad, Reshape, Transpose] Load/Store op to AffineLoad/AffineStore op

* [LSTM] Load/Store op to AffineLoad/AffineStore op

* [Conv, Norm, Pooling] Load/Store op to AffineLoad/AffineStore op

* Add affine-loop-fusion pass

* Use Load/Store for scalar

* Use Load/Store for scalar

* Fix lit tests

* Unknown dimensions for broadcasting ops

* Affine Load/Store for scalar memref

* clang-format

Co-authored-by: Gheorghe-Teodor Bercea <gt.bercea@gmail.com>
Co-authored-by: Tian Jin <tjingrant@gmail.com>
This commit is contained in:
Tung D. Le 2020-07-05 17:20:21 +09:00 committed by GitHub
parent 2c8f5701bd
commit 7e05f371de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 494 additions and 469 deletions

View File

@ -619,20 +619,35 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
for (auto arg : iterationBlock.getArguments()) for (auto arg : iterationBlock.getArguments())
loopIVs.push_back(arg); loopIVs.push_back(arg);
} }
// Fold over operands for each of their scalar values // Fold over operands for each of their scalar values.
Value accumulated, next; Value accumulated, next;
auto accumulatedLoopIVs = getLoopIVsForBroadcasting( // Obtain the first operand.
std::vector<Value> accumulatedLoopIVs = getLoopIVsForBroadcasting(
loc, rewriter, loopIVs, operands[0], broadcastedDimInfo[0]); loc, rewriter, loopIVs, operands[0], broadcastedDimInfo[0]);
accumulated = rewriter.create<LoadOp>(loc, operands[0], accumulatedLoopIVs); if (!hasAllConstantDimensions(memRefType))
// In case of unknown dimensions, use std.load since
// 'getLoopIVsForBroadcasting' has not supported affine map so far.
accumulated =
rewriter.create<LoadOp>(loc, operands[0], accumulatedLoopIVs);
else
accumulated =
rewriter.create<AffineLoadOp>(loc, operands[0], accumulatedLoopIVs);
// Iterate over the remaining operands.
for (unsigned i = 1; i < numArgs; i++) { for (unsigned i = 1; i < numArgs; i++) {
auto nextLoopIVs = getLoopIVsForBroadcasting( std::vector<Value> nextLoopIVs = getLoopIVsForBroadcasting(
loc, rewriter, loopIVs, operands[i], broadcastedDimInfo[i]); loc, rewriter, loopIVs, operands[i], broadcastedDimInfo[i]);
next = rewriter.create<LoadOp>(loc, operands[i], nextLoopIVs); if (!hasAllConstantDimensions(memRefType))
// In case of unknown dimensions, use std.load since
// 'getLoopIVsForBroadcasting' has not supported affine map so far.
next = rewriter.create<LoadOp>(loc, operands[i], nextLoopIVs);
else
next = rewriter.create<AffineLoadOp>(loc, operands[i], nextLoopIVs);
accumulated = emitScalarOpFor<ElementwiseVariadicOp>( accumulated = emitScalarOpFor<ElementwiseVariadicOp>(
rewriter, loc, op, memRefType.getElementType(), {accumulated, next}); rewriter, loc, op, memRefType.getElementType(), {accumulated, next});
} }
// Store result in the resulting array. // Store result in the resulting array.
rewriter.create<StoreOp>(loc, accumulated, alloc, loopIVs); rewriter.create<AffineStoreOp>(loc, accumulated, alloc, loopIVs);
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);

View File

@ -156,23 +156,23 @@ struct ONNXGemmOpLowering : public ConversionPattern {
// Initialize the output of A*B // Initialize the output of A*B
auto zero = emitConstantOp(rewriter, loc, memRefType.getElementType(), 0); auto zero = emitConstantOp(rewriter, loc, memRefType.getElementType(), 0);
rewriter.create<StoreOp>(loc, zero, alloc, loopMNIVs); rewriter.create<AffineStoreOp>(loc, zero, alloc, loopMNIVs);
// Compute A*B // Compute A*B
auto matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, reductionPack); auto matmulIterateOp = rewriter.create<KrnlIterateOp>(loc, reductionPack);
// Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting) // Compute beta*C, and add up to alpha*A*B (unidirectional broadcasting)
auto loadedAB = rewriter.create<LoadOp>(loc, alloc, loopMNIVs); auto loadedAB = rewriter.create<AffineLoadOp>(loc, alloc, loopMNIVs);
auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB); auto alphaAB = rewriter.create<MulFOp>(loc, alpha, loadedAB);
if (hasBias) { if (hasBias) {
auto loopCIVs = getLoopIVsForBroadcasting( auto loopCIVs = getLoopIVsForBroadcasting(
loc, rewriter, loopMNIVs, C, broadcastedDimInfo); loc, rewriter, loopMNIVs, C, broadcastedDimInfo);
auto loadedC = rewriter.create<LoadOp>(loc, C, loopCIVs); auto loadedC = rewriter.create<AffineLoadOp>(loc, C, loopCIVs);
auto betaC = rewriter.create<MulFOp>(loc, beta, loadedC); auto betaC = rewriter.create<MulFOp>(loc, beta, loadedC);
auto Y = rewriter.create<AddFOp>(loc, alphaAB, betaC); auto Y = rewriter.create<AddFOp>(loc, alphaAB, betaC);
rewriter.create<StoreOp>(loc, Y, alloc, loopMNIVs); rewriter.create<AffineStoreOp>(loc, Y, alloc, loopMNIVs);
} else { } else {
rewriter.create<StoreOp>(loc, alphaAB, alloc, loopMNIVs); rewriter.create<AffineStoreOp>(loc, alphaAB, alloc, loopMNIVs);
} }
// Insert instructions to do matrix multiplication: A*B // Insert instructions to do matrix multiplication: A*B
@ -199,12 +199,12 @@ struct ONNXGemmOpLowering : public ConversionPattern {
} }
// Matmul computation // Matmul computation
auto loadedA = rewriter.create<LoadOp>(loc, A, loopAIVs); auto loadedA = rewriter.create<AffineLoadOp>(loc, A, loopAIVs);
auto loadedB = rewriter.create<LoadOp>(loc, B, loopBIVs); auto loadedB = rewriter.create<AffineLoadOp>(loc, B, loopBIVs);
auto loadedY = rewriter.create<LoadOp>(loc, alloc, loopMNIVs); auto loadedY = rewriter.create<AffineLoadOp>(loc, alloc, loopMNIVs);
auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB); auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB);
auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB); auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB);
rewriter.create<StoreOp>(loc, accumulated, alloc, loopMNIVs); rewriter.create<AffineStoreOp>(loc, accumulated, alloc, loopMNIVs);
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);

View File

@ -221,7 +221,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
} }
// Fill the output with value 0. // Fill the output with value 0.
rewriter.create<StoreOp>(loc, zero, alloc, loopBatchMNIVs); rewriter.create<AffineStoreOp>(loc, zero, alloc, loopBatchMNIVs);
// Iterate along the reduction dimension. // Iterate along the reduction dimension.
// Use a value from A. // Use a value from A.
@ -265,17 +265,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
loopBatchKNIVs.emplace_back(loopMNIVs[0]); loopBatchKNIVs.emplace_back(loopMNIVs[0]);
} }
// Matmul computation // Matmul computation
auto loadedA = rewriter.create<LoadOp>(loc, A, loopBatchMKIVs); auto loadedA = rewriter.create<AffineLoadOp>(loc, A, loopBatchMKIVs);
auto loadedB = rewriter.create<LoadOp>(loc, B, loopBatchKNIVs); auto loadedB = rewriter.create<AffineLoadOp>(loc, B, loopBatchKNIVs);
auto loadedY = rewriter.create<LoadOp>(loc, alloc, loopBatchMNIVs); auto loadedY = rewriter.create<AffineLoadOp>(loc, alloc, loopBatchMNIVs);
if (elementType.isa<IntegerType>()) { if (elementType.isa<IntegerType>()) {
auto AB = rewriter.create<MulIOp>(loc, loadedA, loadedB); auto AB = rewriter.create<MulIOp>(loc, loadedA, loadedB);
auto accumulated = rewriter.create<AddIOp>(loc, loadedY, AB); auto accumulated = rewriter.create<AddIOp>(loc, loadedY, AB);
rewriter.create<StoreOp>(loc, accumulated, alloc, loopBatchMNIVs); rewriter.create<AffineStoreOp>(loc, accumulated, alloc, loopBatchMNIVs);
} else if (elementType.isa<FloatType>()) { } else if (elementType.isa<FloatType>()) {
auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB); auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB);
auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB); auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB);
rewriter.create<StoreOp>(loc, accumulated, alloc, loopBatchMNIVs); rewriter.create<AffineStoreOp>(loc, accumulated, alloc, loopBatchMNIVs);
} }
} else if ((AShape.size() == 1) && (BShape.size() == 1)) { } else if ((AShape.size() == 1) && (BShape.size() == 1)) {
// Case 3: // Case 3:
@ -283,7 +283,7 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
// Fill the output with value 0. // Fill the output with value 0.
Value zeroIndex = rewriter.create<ConstantIndexOp>(loc, 0); Value zeroIndex = rewriter.create<ConstantIndexOp>(loc, 0);
rewriter.create<StoreOp>(loc, zero, alloc, zeroIndex); rewriter.create<AffineStoreOp>(loc, zero, alloc, zeroIndex);
// Iterate along the reduction dimension. // Iterate along the reduction dimension.
// Use a value from A. // Use a value from A.
@ -310,17 +310,17 @@ struct ONNXMatMulOpLowering : public ConversionPattern {
loopKIVs.emplace_back(reduceIterationBlock.getArgument(0)); loopKIVs.emplace_back(reduceIterationBlock.getArgument(0));
// Matmul computation // Matmul computation
auto loadedA = rewriter.create<LoadOp>(loc, A, loopKIVs); auto loadedA = rewriter.create<AffineLoadOp>(loc, A, loopKIVs);
auto loadedB = rewriter.create<LoadOp>(loc, B, loopKIVs); auto loadedB = rewriter.create<AffineLoadOp>(loc, B, loopKIVs);
auto loadedY = rewriter.create<LoadOp>(loc, alloc, zeroIndex); auto loadedY = rewriter.create<AffineLoadOp>(loc, alloc, zeroIndex);
if (elementType.isa<IntegerType>()) { if (elementType.isa<IntegerType>()) {
auto AB = rewriter.create<MulIOp>(loc, loadedA, loadedB); auto AB = rewriter.create<MulIOp>(loc, loadedA, loadedB);
auto accumulated = rewriter.create<AddIOp>(loc, loadedY, AB); auto accumulated = rewriter.create<AddIOp>(loc, loadedY, AB);
rewriter.create<StoreOp>(loc, accumulated, alloc, zeroIndex); rewriter.create<AffineStoreOp>(loc, accumulated, alloc, zeroIndex);
} else if (elementType.isa<FloatType>()) { } else if (elementType.isa<FloatType>()) {
auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB); auto AB = rewriter.create<MulFOp>(loc, loadedA, loadedB);
auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB); auto accumulated = rewriter.create<AddFOp>(loc, loadedY, AB);
rewriter.create<StoreOp>(loc, accumulated, alloc, zeroIndex); rewriter.create<AffineStoreOp>(loc, accumulated, alloc, zeroIndex);
} }
} else { } else {
// No scalar matrix multiplication. // No scalar matrix multiplication.

View File

@ -212,7 +212,7 @@ struct ONNXReductionOpLowering : public ConversionPattern {
Value identity = Value identity =
getIdentityValue<ONNXReductionOp>(rewriter, loc, elementOutType); getIdentityValue<ONNXReductionOp>(rewriter, loc, elementOutType);
rewriter.create<StoreOp>(loc, identity, alloc, loopIVs); rewriter.create<AffineStoreOp>(loc, identity, alloc, loopIVs);
// Define an Krnl loop to do reduction. // Define an Krnl loop to do reduction.
rewriter.setInsertionPointAfter(iterateOpInit); rewriter.setInsertionPointAfter(iterateOpInit);
@ -256,11 +256,11 @@ struct ONNXReductionOpLowering : public ConversionPattern {
} }
Value next, accumulated; Value next, accumulated;
next = rewriter.create<LoadOp>(loc, operands[0], inLoopIVs); next = rewriter.create<AffineLoadOp>(loc, operands[0], inLoopIVs);
accumulated = rewriter.create<LoadOp>(loc, alloc, outLoopIVs); accumulated = rewriter.create<AffineLoadOp>(loc, alloc, outLoopIVs);
accumulated = emitScalarOpFor<ONNXReductionOp>( accumulated = emitScalarOpFor<ONNXReductionOp>(
rewriter, loc, op, memRefOutType.getElementType(), {accumulated, next}); rewriter, loc, op, memRefOutType.getElementType(), {accumulated, next});
rewriter.create<StoreOp>(loc, accumulated, alloc, outLoopIVs); rewriter.create<AffineStoreOp>(loc, accumulated, alloc, outLoopIVs);
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);
return success(); return success();

View File

@ -104,8 +104,9 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
outerLoopIVs.push_back(arg); outerLoopIVs.push_back(arg);
// Reset accumulators. // Reset accumulators.
rewriter.create<StoreOp>(loc, zero, sumOp); rewriter.create<AffineStoreOp>(loc, zero, sumOp, ArrayRef<Value>{});
rewriter.create<StoreOp>(loc, negInfinity, maxOp); rewriter.create<AffineStoreOp>(
loc, negInfinity, maxOp, ArrayRef<Value>{});
// Create an inner loop to compute max. // Create an inner loop to compute max.
maxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack); maxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack);
@ -115,8 +116,9 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
softmaxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack); softmaxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack);
} else { } else {
// Reset accumulators. // Reset accumulators.
rewriter.create<StoreOp>(loc, zero, sumOp); rewriter.create<AffineStoreOp>(loc, zero, sumOp, ArrayRef<Value>{});
rewriter.create<StoreOp>(loc, negInfinity, maxOp); rewriter.create<AffineStoreOp>(
loc, negInfinity, maxOp, ArrayRef<Value>{});
// Create an inner loop to compute max. // Create an inner loop to compute max.
maxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack); maxIterateOp = rewriter.create<KrnlIterateOp>(loc, innerPack);
@ -142,16 +144,16 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
maxLoopIVs.push_back(arg); maxLoopIVs.push_back(arg);
// Compute the max value. // Compute the max value.
Value max = rewriter.create<LoadOp>(loc, maxOp); Value max = rewriter.create<AffineLoadOp>(loc, maxOp);
Value nextMax = rewriter.create<LoadOp>(loc, input, maxLoopIVs); Value nextMax = rewriter.create<AffineLoadOp>(loc, input, maxLoopIVs);
auto maxCond = auto maxCond =
rewriter.create<CmpFOp>(loc, CmpFPredicate::OGT, max, nextMax); rewriter.create<CmpFOp>(loc, CmpFPredicate::OGT, max, nextMax);
max = rewriter.create<SelectOp>(loc, maxCond, max, nextMax); max = rewriter.create<SelectOp>(loc, maxCond, max, nextMax);
rewriter.create<StoreOp>(loc, max, maxOp); rewriter.create<AffineStoreOp>(loc, max, maxOp, ArrayRef<Value>{});
// Get the max. // Get the max.
rewriter.setInsertionPoint(sumIterateOp); rewriter.setInsertionPoint(sumIterateOp);
max = rewriter.create<LoadOp>(loc, maxOp); max = rewriter.create<AffineLoadOp>(loc, maxOp);
// Insert instructions inside the sum loop. // Insert instructions inside the sum loop.
Block &sumIterationBlock = sumIterateOp.bodyRegion().front(); Block &sumIterationBlock = sumIterateOp.bodyRegion().front();
@ -165,18 +167,18 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
sumLoopIVs.push_back(arg); sumLoopIVs.push_back(arg);
// Sum up values. // Sum up values.
Value sum = rewriter.create<LoadOp>(loc, sumOp); Value sum = rewriter.create<AffineLoadOp>(loc, sumOp);
Value next = rewriter.create<LoadOp>(loc, input, sumLoopIVs); Value next = rewriter.create<AffineLoadOp>(loc, input, sumLoopIVs);
Value sub = rewriter.create<SubFOp>(loc, next, max); Value sub = rewriter.create<SubFOp>(loc, next, max);
Value exp = rewriter.create<ExpOp>(loc, sub); Value exp = rewriter.create<ExpOp>(loc, sub);
sum = rewriter.create<AddFOp>(loc, sum, exp); sum = rewriter.create<AddFOp>(loc, sum, exp);
rewriter.create<StoreOp>(loc, sum, sumOp); rewriter.create<AffineStoreOp>(loc, sum, sumOp, ArrayRef<Value>{});
// Store intermediate values in the result to avoid recomputation. // Store intermediate values in the result to avoid recomputation.
rewriter.create<StoreOp>(loc, exp, alloc, sumLoopIVs); rewriter.create<AffineStoreOp>(loc, exp, alloc, sumLoopIVs);
// Get the sum. // Get the sum.
rewriter.setInsertionPoint(softmaxIterateOp); rewriter.setInsertionPoint(softmaxIterateOp);
sum = rewriter.create<LoadOp>(loc, sumOp); sum = rewriter.create<AffineLoadOp>(loc, sumOp);
// Insert instructions inside the softmax loop. // Insert instructions inside the softmax loop.
Block &softmaxIterationBlock = softmaxIterateOp.bodyRegion().front(); Block &softmaxIterationBlock = softmaxIterateOp.bodyRegion().front();
@ -190,9 +192,10 @@ struct ONNXSoftmaxOpLowering : public ConversionPattern {
softmaxLoopIVs.push_back(arg); softmaxLoopIVs.push_back(arg);
// Compute softmax. // Compute softmax.
Value expLoadedVal = rewriter.create<LoadOp>(loc, alloc, softmaxLoopIVs); Value expLoadedVal =
rewriter.create<AffineLoadOp>(loc, alloc, softmaxLoopIVs);
Value result = rewriter.create<DivFOp>(loc, expLoadedVal, sum); Value result = rewriter.create<DivFOp>(loc, expLoadedVal, sum);
rewriter.create<StoreOp>(loc, result, alloc, softmaxLoopIVs); rewriter.create<AffineStoreOp>(loc, result, alloc, softmaxLoopIVs);
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);

View File

@ -129,10 +129,14 @@ struct ONNXConvOpLowering : public ConversionPattern {
if (group > 1) { if (group > 1) {
// Middle loop is over groups and third loop is over the // Middle loop is over groups and third loop is over the
// kernel identifiers in the current group. // kernel identifiers in the current group.
auto kernelsOffset = rewriter.create<MulIOp>( AffineMap kernelMap = AffineMap::get(2, 1,
loc, outerLoops.getInductionVar(gIndex), kernelsPerGroupValue); /*gIndex=*/rewriter.getAffineDimExpr(0) *
kernel = rewriter.create<AddIOp>( /*kernelsPerGroup=*/rewriter.getAffineSymbolExpr(0) +
loc, kernelsOffset, outerLoops.getInductionVar(mIndex)); /*mIndex=*/rewriter.getAffineDimExpr(1));
kernel = rewriter.create<AffineApplyOp>(loc, kernelMap,
ArrayRef<Value>{/*gIndex=*/outerLoops.getInductionVar(gIndex),
/*kernelsPerGroupValue=*/kernelsPerGroupValue,
/*mIndex=*/outerLoops.getInductionVar(mIndex)});
} }
// 2.2 Define spatial loops // 2.2 Define spatial loops
@ -209,9 +213,8 @@ struct ONNXConvOpLowering : public ConversionPattern {
/*subchannel=*/rewriter.getAffineSymbolExpr(0) + /*subchannel=*/rewriter.getAffineSymbolExpr(0) +
/*c=*/rewriter.getAffineDimExpr(1)); /*c=*/rewriter.getAffineDimExpr(1));
channelDepth = rewriter.create<AffineApplyOp>(loc, indexMap, channelDepth = rewriter.create<AffineApplyOp>(loc, indexMap,
ValueRange( ArrayRef<Value>{/*g=*/outerLoops.getInductionVar(gIndex),
ArrayRef<Value>{/*g=*/outerLoops.getInductionVar(gIndex), /*c=*/channelDepth, /*subchannel=*/subchannels});
/*c=*/channelDepth, /*subchannel=*/subchannels}));
} }
dataIndices.emplace_back(channelDepth); dataIndices.emplace_back(channelDepth);
// sX * rX + kX // sX * rX + kX
@ -231,8 +234,8 @@ struct ONNXConvOpLowering : public ConversionPattern {
/*sX=*/rewriter.getAffineDimExpr(0) * /*rX=*/stride + /*sX=*/rewriter.getAffineDimExpr(0) * /*rX=*/stride +
/*kX=*/rewriter.getAffineDimExpr(1)); /*kX=*/rewriter.getAffineDimExpr(1));
Value outIV = rewriter.create<AffineApplyOp>(loc, indexMap, Value outIV = rewriter.create<AffineApplyOp>(loc, indexMap,
ValueRange(ArrayRef<Value>{spatialLoops.getInductionVar(i), ArrayRef<Value>{spatialLoops.getInductionVar(i),
innerLoops.getInductionVar(i + 1)})); innerLoops.getInductionVar(i + 1)});
dataIndices.emplace_back(outIV); dataIndices.emplace_back(outIV);
} }

View File

@ -79,10 +79,10 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
loopCIVs.emplace_back(rewriter.create<ConstantIndexOp>(loc, 0)); loopCIVs.emplace_back(rewriter.create<ConstantIndexOp>(loc, 0));
} }
auto scaleVal = rewriter.create<LoadOp>(loc, scale, loopCIVs); auto scaleVal = rewriter.create<AffineLoadOp>(loc, scale, loopCIVs);
auto biasVal = rewriter.create<LoadOp>(loc, bias, loopCIVs); auto biasVal = rewriter.create<AffineLoadOp>(loc, bias, loopCIVs);
auto meanVal = rewriter.create<LoadOp>(loc, mean, loopCIVs); auto meanVal = rewriter.create<AffineLoadOp>(loc, mean, loopCIVs);
auto varianceVal = rewriter.create<LoadOp>(loc, variance, loopCIVs); auto varianceVal = rewriter.create<AffineLoadOp>(loc, variance, loopCIVs);
// Create a KrnlIterateOp along the other dimensions. // Create a KrnlIterateOp along the other dimensions.
SmallVector<int64_t, 4> axes; SmallVector<int64_t, 4> axes;
@ -118,7 +118,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
loopIVs.emplace_back(args[0]); loopIVs.emplace_back(args[0]);
} }
auto xVal = rewriter.create<LoadOp>(loc, operand, loopIVs); auto xVal = rewriter.create<AffineLoadOp>(loc, operand, loopIVs);
// normalize // normalize
auto dividend = rewriter.create<SubFOp>(loc, xVal, meanVal); auto dividend = rewriter.create<SubFOp>(loc, xVal, meanVal);
auto adjustedVarianceVal = auto adjustedVarianceVal =
@ -129,7 +129,7 @@ struct ONNXBatchNormalizationTestModeOpLowering : public ConversionPattern {
auto scaleNormVal = rewriter.create<MulFOp>(loc, scaleVal, normVal); auto scaleNormVal = rewriter.create<MulFOp>(loc, scaleVal, normVal);
auto shiftScaleNormVal = auto shiftScaleNormVal =
rewriter.create<AddFOp>(loc, scaleNormVal, biasVal); rewriter.create<AddFOp>(loc, scaleNormVal, biasVal);
rewriter.create<StoreOp>(loc, shiftScaleNormVal, alloc, loopIVs); rewriter.create<AffineStoreOp>(loc, shiftScaleNormVal, alloc, loopIVs);
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);

View File

@ -100,7 +100,7 @@ void postProcessPoolingWindow<ONNXAveragePoolOp>(
ArrayRef<Value> poolDimValues) { ArrayRef<Value> poolDimValues) {
// AveragePool's result type is FloatType, so it's safe to use DivFOp, SubFOp. // AveragePool's result type is FloatType, so it's safe to use DivFOp, SubFOp.
bool countIncludePad = getCountIncludePad<ONNXAveragePoolOp>(poolOp); bool countIncludePad = getCountIncludePad<ONNXAveragePoolOp>(poolOp);
Value numerator = rewriter.create<LoadOp>(loc, alloc, resultIndices); Value numerator = rewriter.create<AffineLoadOp>(loc, alloc, resultIndices);
Value denominator; Value denominator;
if (countIncludePad) { if (countIncludePad) {
int64_t kernelSize = 1; int64_t kernelSize = 1;
@ -120,7 +120,7 @@ void postProcessPoolingWindow<ONNXAveragePoolOp>(
Value average = rewriter.create<DivFOp>(loc, numerator, denominator); Value average = rewriter.create<DivFOp>(loc, numerator, denominator);
rewriter.create<StoreOp>(loc, average, alloc, resultIndices); rewriter.create<AffineStoreOp>(loc, average, alloc, resultIndices);
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -167,9 +167,7 @@ Value insertAllocAndDeallocForPooling(ConversionPatternRewriter &rewriter,
dilations.empty() ? 1 : dilations[spatialIndex])); dilations.empty() ? 1 : dilations[spatialIndex]));
// Apply the affine map. // Apply the affine map.
Value dimVal = Value dimVal = rewriter.create<AffineApplyOp>(loc, dimMap, dimArgs);
rewriter.create<AffineApplyOp>(loc, dimMap, ValueRange(dimArgs));
allocOperands.emplace_back(dimVal); allocOperands.emplace_back(dimVal);
} }
} }
@ -346,7 +344,7 @@ struct ONNXPoolOpLowering : public ConversionPattern {
outputIndices.emplace_back(outputLoops.getInductionVar(i)); outputIndices.emplace_back(outputLoops.getInductionVar(i));
// 2.1 Emit: output[n][c][ho][wo] = identity // 2.1 Emit: output[n][c][ho][wo] = identity
rewriter.create<StoreOp>(loc, identity, alloc, outputIndices); rewriter.create<AffineStoreOp>(loc, identity, alloc, outputIndices);
// 2.2 Emit affine maps which express the lower and upper bounds for the // 2.2 Emit affine maps which express the lower and upper bounds for the
// pooling window's dimensions. // pooling window's dimensions.
@ -441,11 +439,11 @@ struct ONNXPoolOpLowering : public ConversionPattern {
{ // Construct poolStartValues and poolDimValues. { // Construct poolStartValues and poolDimValues.
for (int i = 0; i < kernelShape.size(); ++i) { for (int i = 0; i < kernelShape.size(); ++i) {
Value startIndex = rewriter.create<AffineMaxOp>( Value startIndex = rewriter.create<AffineMaxOp>(
loc, poolStartMap, ValueRange(IVsAndConstants[i])); loc, poolStartMap, IVsAndConstants[i]);
poolStartValues.emplace_back(startIndex); poolStartValues.emplace_back(startIndex);
Value endIndex = rewriter.create<AffineMinOp>( Value endIndex =
loc, poolEndMap, ValueRange(IVsAndConstants[i])); rewriter.create<AffineMinOp>(loc, poolEndMap, IVsAndConstants[i]);
Value dim = rewriter.create<SubIOp>(loc, endIndex, startIndex); Value dim = rewriter.create<SubIOp>(loc, endIndex, startIndex);
if (isDilated) { if (isDilated) {
@ -514,10 +512,10 @@ struct ONNXPoolOpLowering : public ConversionPattern {
Value loadInput = Value loadInput =
rewriter.create<LoadOp>(loc, inputOperand, inputIndices); rewriter.create<LoadOp>(loc, inputOperand, inputIndices);
Value loadPartialOutput = Value loadPartialOutput =
rewriter.create<LoadOp>(loc, alloc, outputIndices); rewriter.create<AffineLoadOp>(loc, alloc, outputIndices);
Value output = emitScalarOpFor<PoolOp>(rewriter, loc, op, Value output = emitScalarOpFor<PoolOp>(rewriter, loc, op,
outputElementType, {loadPartialOutput, loadInput}); outputElementType, {loadPartialOutput, loadInput});
rewriter.create<StoreOp>(loc, output, alloc, outputIndices); rewriter.create<AffineStoreOp>(loc, output, alloc, outputIndices);
} }
// 2.5 Post-processing for the pooling window, e.g. taking average. // 2.5 Post-processing for the pooling window, e.g. taking average.

View File

@ -222,13 +222,15 @@ LstmState allocAndInitializeStates<ONNXLSTMOp, LstmState>(
Value hiddenVal = zero; Value hiddenVal = zero;
if (!isNoneType(operandAdaptor.initial_h())) if (!isNoneType(operandAdaptor.initial_h()))
hiddenVal = rewriter.create<LoadOp>(loc, operandAdaptor.initial_h(), IVs); hiddenVal =
rewriter.create<StoreOp>(loc, hiddenVal, state.ht, IVs); rewriter.create<AffineLoadOp>(loc, operandAdaptor.initial_h(), IVs);
rewriter.create<AffineStoreOp>(loc, hiddenVal, state.ht, IVs);
Value cellVal = zero; Value cellVal = zero;
if (!isNoneType(operandAdaptor.initial_c())) if (!isNoneType(operandAdaptor.initial_c()))
cellVal = rewriter.create<LoadOp>(loc, operandAdaptor.initial_c(), IVs); cellVal =
rewriter.create<StoreOp>(loc, cellVal, state.ct, IVs); rewriter.create<AffineLoadOp>(loc, operandAdaptor.initial_c(), IVs);
rewriter.create<AffineStoreOp>(loc, cellVal, state.ct, IVs);
} }
rewriter.restoreInsertionPoint(ipInitializationLoops); rewriter.restoreInsertionPoint(ipInitializationLoops);
return state; return state;
@ -320,8 +322,8 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
for (unsigned i = 0; i < 4; ++i) { for (unsigned i = 0; i < 4; ++i) {
Value wHiddenIV = Value wHiddenIV =
rewriter.create<AffineApplyOp>(loc, accessByOffsetMap, rewriter.create<AffineApplyOp>(loc, accessByOffsetMap,
ValueRange(std::vector<Value>{/*iv=*/hiddenIV, std::vector<Value>{/*iv=*/hiddenIV,
/*index=*/constantIndices[i], /*size=*/hiddenDimVal})); /*index=*/constantIndices[i], /*size=*/hiddenDimVal});
wbIOFCIVs.emplace_back(SmallVector<Value, 2>{directionIV, wHiddenIV}); wbIOFCIVs.emplace_back(SmallVector<Value, 2>{directionIV, wHiddenIV});
} }
// Rb[iofc] // Rb[iofc]
@ -329,8 +331,8 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
SmallVector<Value, 4> rbIVs; SmallVector<Value, 4> rbIVs;
Value rHiddenIV = Value rHiddenIV =
rewriter.create<AffineApplyOp>(loc, accessByOffsetMap, rewriter.create<AffineApplyOp>(loc, accessByOffsetMap,
ValueRange(std::vector<Value>{/*iv=*/hiddenIV, std::vector<Value>{/*iv=*/hiddenIV,
/*index=*/constantIndices[i], /*size=*/hiddenDimVal})); /*index=*/constantIndices[i], /*size=*/hiddenDimVal});
rbIOFCIVs.emplace_back(SmallVector<Value, 2>{directionIV, rHiddenIV}); rbIOFCIVs.emplace_back(SmallVector<Value, 2>{directionIV, rHiddenIV});
} }
} }
@ -339,17 +341,16 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
if (hasPeepholes) { if (hasPeepholes) {
for (unsigned i = 0; i < 3; ++i) { for (unsigned i = 0; i < 3; ++i) {
SmallVector<Value, 4> pIVs; SmallVector<Value, 4> pIVs;
Value pHiddenIV = Value pHiddenIV = rewriter.create<AffineApplyOp>(loc,
rewriter.create<AffineApplyOp>(loc, accessByOffsetMap, accessByOffsetMap,
ValueRange(std::vector<Value>{ std::vector<Value>{hiddenIV, constantIndices[i], hiddenDimVal});
hiddenIV, constantIndices[i], hiddenDimVal}));
pIOFIVs.emplace_back(SmallVector<Value, 2>{directionIV, pHiddenIV}); pIOFIVs.emplace_back(SmallVector<Value, 2>{directionIV, pHiddenIV});
} }
} }
} }
Value loadH = rewriter.create<LoadOp>(loc, state.ht, hIVs); Value loadH = rewriter.create<AffineLoadOp>(loc, state.ht, hIVs);
Value loadC = rewriter.create<LoadOp>(loc, state.ct, cIVs); Value loadC = rewriter.create<AffineLoadOp>(loc, state.ct, cIVs);
// Emit instructions for matrix multiplications: // Emit instructions for matrix multiplications:
// Xt*(Wi^T), Xt*(Wo^T), Xt*(Wf^t), Xt*(Wc^T) // Xt*(Wi^T), Xt*(Wo^T), Xt*(Wf^t), Xt*(Wc^T)
@ -361,9 +362,9 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0); MemRefType scalarMemRefType = MemRefType::get({}, elementType, {}, 0);
for (unsigned i = 0; i < 4; ++i) { for (unsigned i = 0; i < 4; ++i) {
Value xwAlloc = rewriter.create<AllocOp>(loc, scalarMemRefType); Value xwAlloc = rewriter.create<AllocOp>(loc, scalarMemRefType);
rewriter.create<StoreOp>(loc, zero, xwAlloc); rewriter.create<AffineStoreOp>(loc, zero, xwAlloc, ArrayRef<Value>{});
Value hrAlloc = rewriter.create<AllocOp>(loc, scalarMemRefType); Value hrAlloc = rewriter.create<AllocOp>(loc, scalarMemRefType);
rewriter.create<StoreOp>(loc, zero, hrAlloc); rewriter.create<AffineStoreOp>(loc, zero, hrAlloc, ArrayRef<Value>{});
xwIOFC.emplace_back(xwAlloc); xwIOFC.emplace_back(xwAlloc);
hrIOFC.emplace_back(hrAlloc); hrIOFC.emplace_back(hrAlloc);
} }
@ -390,10 +391,9 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
// R[iofc] :: [num_directions, 4*hidden_size, input_size] // R[iofc] :: [num_directions, 4*hidden_size, input_size]
for (unsigned i = 0; i < 4; ++i) { for (unsigned i = 0; i < 4; ++i) {
SmallVector<Value, 4> wIVs, rIVs; SmallVector<Value, 4> wIVs, rIVs;
Value wHiddenIV = Value wHiddenIV = rewriter.create<AffineApplyOp>(loc,
rewriter.create<AffineApplyOp>(loc, accessByOffsetMap, accessByOffsetMap,
ValueRange(std::vector<Value>{ std::vector<Value>{hiddenIV, constantIndices[i], hiddenDimVal});
hiddenIV, constantIndices[i], hiddenDimVal}));
wIVs = {directionIV, wHiddenIV, reductionIV}; wIVs = {directionIV, wHiddenIV, reductionIV};
wIOFCIVs.emplace_back(wIVs); wIOFCIVs.emplace_back(wIVs);
@ -402,77 +402,80 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
rIOFCIVs.emplace_back(rIVs); rIOFCIVs.emplace_back(rIVs);
} }
Value loadX = rewriter.create<LoadOp>(loc, operandAdaptor.X(), xIVs); Value loadX =
rewriter.create<AffineLoadOp>(loc, operandAdaptor.X(), xIVs);
for (unsigned i = 0; i < 4; ++i) { for (unsigned i = 0; i < 4; ++i) {
// Xt * Wiofc // Xt * Wiofc
Value loadW = Value loadW = rewriter.create<AffineLoadOp>(
rewriter.create<LoadOp>(loc, operandAdaptor.W(), wIOFCIVs[i]); loc, operandAdaptor.W(), wIOFCIVs[i]);
Value xwVal = rewriter.create<MulFOp>(loc, loadX, loadW); Value xwVal = rewriter.create<MulFOp>(loc, loadX, loadW);
Value loadXW = rewriter.create<LoadOp>(loc, xwIOFC[i]); Value loadXW = rewriter.create<AffineLoadOp>(loc, xwIOFC[i]);
Value nextXW = rewriter.create<AddFOp>(loc, loadXW, xwVal); Value nextXW = rewriter.create<AddFOp>(loc, loadXW, xwVal);
rewriter.create<StoreOp>(loc, nextXW, xwIOFC[i]); rewriter.create<AffineStoreOp>(
loc, nextXW, xwIOFC[i], ArrayRef<Value>{});
// Ht-1 * Riofc // Ht-1 * Riofc
Value loadR = Value loadR = rewriter.create<AffineLoadOp>(
rewriter.create<LoadOp>(loc, operandAdaptor.R(), rIOFCIVs[i]); loc, operandAdaptor.R(), rIOFCIVs[i]);
Value hrVal = rewriter.create<MulFOp>(loc, loadH, loadR); Value hrVal = rewriter.create<MulFOp>(loc, loadH, loadR);
Value loadHR = rewriter.create<LoadOp>(loc, hrIOFC[i]); Value loadHR = rewriter.create<AffineLoadOp>(loc, hrIOFC[i]);
Value nextHR = rewriter.create<AddFOp>(loc, loadHR, hrVal); Value nextHR = rewriter.create<AddFOp>(loc, loadHR, hrVal);
rewriter.create<StoreOp>(loc, nextHR, hrIOFC[i]); rewriter.create<AffineStoreOp>(
loc, nextHR, hrIOFC[i], ArrayRef<Value>{});
} }
} }
rewriter.restoreInsertionPoint(ipReductionLoops); rewriter.restoreInsertionPoint(ipReductionLoops);
} }
// it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi) // it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
Value loadXWI = rewriter.create<LoadOp>(loc, xwIOFC[0]); Value loadXWI = rewriter.create<AffineLoadOp>(loc, xwIOFC[0]);
Value loadHRI = rewriter.create<LoadOp>(loc, hrIOFC[0]); Value loadHRI = rewriter.create<AffineLoadOp>(loc, hrIOFC[0]);
Value it = rewriter.create<AddFOp>(loc, loadXWI, loadHRI); Value it = rewriter.create<AddFOp>(loc, loadXWI, loadHRI);
if (hasPeepholes) { if (hasPeepholes) {
Value loadP = Value loadP =
rewriter.create<LoadOp>(loc, operandAdaptor.P(), pIOFIVs[0]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.P(), pIOFIVs[0]);
Value PC = rewriter.create<MulFOp>(loc, loadP, loadC); Value PC = rewriter.create<MulFOp>(loc, loadP, loadC);
it = rewriter.create<AddFOp>(loc, it, PC); it = rewriter.create<AddFOp>(loc, it, PC);
} }
if (hasBiasForInput) { if (hasBiasForInput) {
Value loadWB = Value loadWB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[0]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[0]);
it = rewriter.create<AddFOp>(loc, it, loadWB); it = rewriter.create<AddFOp>(loc, it, loadWB);
Value loadRB = Value loadRB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[0]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[0]);
it = rewriter.create<AddFOp>(loc, it, loadRB); it = rewriter.create<AddFOp>(loc, it, loadRB);
} }
it = applyActivation(rewriter, loc, activationPack.f, it); it = applyActivation(rewriter, loc, activationPack.f, it);
// ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf) // ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
Value loadXWF = rewriter.create<LoadOp>(loc, xwIOFC[2]); Value loadXWF = rewriter.create<AffineLoadOp>(loc, xwIOFC[2]);
Value loadHRF = rewriter.create<LoadOp>(loc, hrIOFC[2]); Value loadHRF = rewriter.create<AffineLoadOp>(loc, hrIOFC[2]);
Value ft = rewriter.create<AddFOp>(loc, loadXWF, loadHRF); Value ft = rewriter.create<AddFOp>(loc, loadXWF, loadHRF);
if (hasPeepholes) { if (hasPeepholes) {
Value loadP = Value loadP =
rewriter.create<LoadOp>(loc, operandAdaptor.P(), pIOFIVs[2]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.P(), pIOFIVs[2]);
Value PC = rewriter.create<MulFOp>(loc, loadP, loadC); Value PC = rewriter.create<MulFOp>(loc, loadP, loadC);
ft = rewriter.create<AddFOp>(loc, ft, PC); ft = rewriter.create<AddFOp>(loc, ft, PC);
} }
if (hasBiasForInput) { if (hasBiasForInput) {
Value loadWB = Value loadWB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[2]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[2]);
ft = rewriter.create<AddFOp>(loc, ft, loadWB); ft = rewriter.create<AddFOp>(loc, ft, loadWB);
Value loadRB = Value loadRB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[2]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[2]);
ft = rewriter.create<AddFOp>(loc, ft, loadRB); ft = rewriter.create<AddFOp>(loc, ft, loadRB);
} }
ft = applyActivation(rewriter, loc, activationPack.f, ft); ft = applyActivation(rewriter, loc, activationPack.f, ft);
// ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc) // ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
Value loadXWC = rewriter.create<LoadOp>(loc, xwIOFC[3]); Value loadXWC = rewriter.create<AffineLoadOp>(loc, xwIOFC[3]);
Value loadHRC = rewriter.create<LoadOp>(loc, hrIOFC[3]); Value loadHRC = rewriter.create<AffineLoadOp>(loc, hrIOFC[3]);
Value ct = rewriter.create<AddFOp>(loc, loadXWC, loadHRC); Value ct = rewriter.create<AddFOp>(loc, loadXWC, loadHRC);
if (hasBiasForInput) { if (hasBiasForInput) {
Value loadWB = Value loadWB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[3]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[3]);
ct = rewriter.create<AddFOp>(loc, ct, loadWB); ct = rewriter.create<AddFOp>(loc, ct, loadWB);
Value loadRB = Value loadRB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[3]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[3]);
ct = rewriter.create<AddFOp>(loc, ct, loadRB); ct = rewriter.create<AddFOp>(loc, ct, loadRB);
} }
ct = applyActivation(rewriter, loc, activationPack.g, ct); ct = applyActivation(rewriter, loc, activationPack.g, ct);
@ -481,24 +484,24 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
Value FtCt1 = rewriter.create<MulFOp>(loc, ft, loadC); Value FtCt1 = rewriter.create<MulFOp>(loc, ft, loadC);
Value itct = rewriter.create<MulFOp>(loc, it, ct); Value itct = rewriter.create<MulFOp>(loc, it, ct);
Value Ct = rewriter.create<AddFOp>(loc, FtCt1, itct); Value Ct = rewriter.create<AddFOp>(loc, FtCt1, itct);
rewriter.create<StoreOp>(loc, Ct, state.ct, cIVs); rewriter.create<AffineStoreOp>(loc, Ct, state.ct, cIVs);
// ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo) // ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
Value loadXWO = rewriter.create<LoadOp>(loc, xwIOFC[1]); Value loadXWO = rewriter.create<AffineLoadOp>(loc, xwIOFC[1]);
Value loadHRO = rewriter.create<LoadOp>(loc, hrIOFC[1]); Value loadHRO = rewriter.create<AffineLoadOp>(loc, hrIOFC[1]);
Value ot = rewriter.create<AddFOp>(loc, loadXWO, loadHRO); Value ot = rewriter.create<AddFOp>(loc, loadXWO, loadHRO);
if (hasPeepholes) { if (hasPeepholes) {
Value loadP = Value loadP =
rewriter.create<LoadOp>(loc, operandAdaptor.P(), pIOFIVs[1]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.P(), pIOFIVs[1]);
Value PC = rewriter.create<MulFOp>(loc, loadP, Ct); Value PC = rewriter.create<MulFOp>(loc, loadP, Ct);
ot = rewriter.create<AddFOp>(loc, ot, PC); ot = rewriter.create<AddFOp>(loc, ot, PC);
} }
if (hasBiasForInput) { if (hasBiasForInput) {
Value loadWB = Value loadWB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[1]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), wbIOFCIVs[1]);
ot = rewriter.create<AddFOp>(loc, ot, loadWB); ot = rewriter.create<AddFOp>(loc, ot, loadWB);
Value loadRB = Value loadRB =
rewriter.create<LoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[1]); rewriter.create<AffineLoadOp>(loc, operandAdaptor.B(), rbIOFCIVs[1]);
ot = rewriter.create<AddFOp>(loc, ot, loadRB); ot = rewriter.create<AddFOp>(loc, ot, loadRB);
} }
ot = applyActivation(rewriter, loc, activationPack.f, ot); ot = applyActivation(rewriter, loc, activationPack.f, ot);
@ -506,12 +509,12 @@ void calculateState<ONNXLSTMOp, LstmState, LstmActivationPack>(
// Ht = ot (.) h(Ct) // Ht = ot (.) h(Ct)
Value hCt = applyActivation(rewriter, loc, activationPack.h, Ct); Value hCt = applyActivation(rewriter, loc, activationPack.h, Ct);
Value Ht = rewriter.create<MulFOp>(loc, ot, hCt); Value Ht = rewriter.create<MulFOp>(loc, ot, hCt);
rewriter.create<StoreOp>(loc, Ht, state.ht, hIVs); rewriter.create<AffineStoreOp>(loc, Ht, state.ht, hIVs);
// Store the current Ht if required. // Store the current Ht if required.
if (!isNoneType(state.allH)) { if (!isNoneType(state.allH)) {
SmallVector<Value, 4> allHIVs{sequenceIV, directionIV, batchIV, hiddenIV}; SmallVector<Value, 4> allHIVs{sequenceIV, directionIV, batchIV, hiddenIV};
rewriter.create<StoreOp>(loc, Ht, state.allH, allHIVs); rewriter.create<AffineStoreOp>(loc, Ht, state.allH, allHIVs);
} }
// Deallocate the temporary results of matrix multiplications. // Deallocate the temporary results of matrix multiplications.

View File

@ -28,7 +28,7 @@ Value applyActivation(ConversionPatternRewriter &rewriter, Location loc,
MemRefType scalarMemRefType = MemRefType scalarMemRefType =
MemRefType::get({}, scalarOperand.getType(), {}, 0); MemRefType::get({}, scalarOperand.getType(), {}, 0);
Value alloc = rewriter.create<AllocOp>(loc, scalarMemRefType); Value alloc = rewriter.create<AllocOp>(loc, scalarMemRefType);
rewriter.create<StoreOp>(loc, scalarOperand, alloc); rewriter.create<AffineStoreOp>(loc, scalarOperand, alloc, ArrayRef<Value>{});
std::vector<mlir::NamedAttribute> attributes; std::vector<mlir::NamedAttribute> attributes;
if (activation.alpha) { if (activation.alpha) {
@ -68,6 +68,6 @@ Value applyActivation(ConversionPatternRewriter &rewriter, Location loc,
else else
llvm_unreachable("Unsupported activation"); llvm_unreachable("Unsupported activation");
Value result = rewriter.create<LoadOp>(loc, res); Value result = rewriter.create<AffineLoadOp>(loc, res);
return result; return result;
} }

View File

@ -126,9 +126,9 @@ struct ONNXRNNOpLowering : public ConversionPattern {
rewriter.getIndexType(), (direction == REVERSE) ? 0 : 1); rewriter.getIndexType(), (direction == REVERSE) ? 0 : 1);
Value reverseSequenceIV = Value reverseSequenceIV =
rewriter.create<AffineApplyOp>(loc, reverseIVMap, rewriter.create<AffineApplyOp>(loc, reverseIVMap,
ValueRange(std::vector<Value>{sequenceLoops.getInductionVar(0), std::vector<Value>{sequenceLoops.getInductionVar(0),
emitConstantOp(rewriter, loc, rewriter.getIndexType(), emitConstantOp(rewriter, loc, rewriter.getIndexType(),
sequenceDimSize)})); sequenceDimSize)});
// Emit calculation for one RNN step. // Emit calculation for one RNN step.
calculateState<RNNOp, S, A>(rewriter, loc, operandAdaptor, state, calculateState<RNNOp, S, A>(rewriter, loc, operandAdaptor, state,
activationReverse, directionIV, reverseSequenceIV); activationReverse, directionIV, reverseSequenceIV);

View File

@ -59,15 +59,18 @@ struct ONNXConcatOpLowering : public ConversionPattern {
if (r != axis || writeOffset == 0) { if (r != axis || writeOffset == 0) {
writeIndices.emplace_back(inputLoops.getInductionVar(r)); writeIndices.emplace_back(inputLoops.getInductionVar(r));
} else { } else {
auto indexWithOffset = rewriter.create<AddIOp>(loc, AffineMap indexWithOffsetMap =
rewriter.create<ConstantIndexOp>(loc, writeOffset), AffineMap::get(1, 0, rewriter.getAffineDimExpr(0) + writeOffset);
inputLoops.getInductionVar(r)); Value indexWithOffset =
rewriter.create<AffineApplyOp>(loc, indexWithOffsetMap,
ArrayRef<Value>{inputLoops.getInductionVar(r)});
writeIndices.emplace_back(indexWithOffset); writeIndices.emplace_back(indexWithOffset);
} }
} }
// Insert copy. // Insert copy.
auto loadData = rewriter.create<LoadOp>(loc, operands[i], readIndices); auto loadData =
rewriter.create<StoreOp>(loc, loadData, alloc, writeIndices); rewriter.create<AffineLoadOp>(loc, operands[i], readIndices);
rewriter.create<AffineStoreOp>(loc, loadData, alloc, writeIndices);
// Increment offset // Increment offset
writeOffset += currShape[axis]; writeOffset += currShape[axis];
} }

View File

@ -88,16 +88,17 @@ struct ONNXPadOpLowering : public ConversionPattern {
if (pads[i] == 0) { if (pads[i] == 0) {
outLoopIVs.emplace_back(valueLoops.getInductionVar(i)); outLoopIVs.emplace_back(valueLoops.getInductionVar(i));
} else { } else {
auto outIV = rewriter.create<AddIOp>(loc, AffineMap indexWithOffsetMap =
rewriter.create<ConstantIndexOp>(loc, pads[i]), AffineMap::get(1, 0, rewriter.getAffineDimExpr(0) + pads[i]);
valueLoops.getInductionVar(i)); Value outIV = rewriter.create<AffineApplyOp>(loc, indexWithOffsetMap,
ArrayRef<Value>{valueLoops.getInductionVar(i)});
outLoopIVs.emplace_back(outIV); outLoopIVs.emplace_back(outIV);
} }
} }
auto originValue = auto originValue =
rewriter.create<LoadOp>(loc, operandAdaptor.data(), inLoopIVs); rewriter.create<AffineLoadOp>(loc, operandAdaptor.data(), inLoopIVs);
rewriter.create<StoreOp>(loc, originValue, alloc, outLoopIVs); rewriter.create<AffineStoreOp>(loc, originValue, alloc, outLoopIVs);
rewriter.setInsertionPointToStart(padLoops.getIterateBlock()); rewriter.setInsertionPointToStart(padLoops.getIterateBlock());
SmallVector<Value, 4> outLoopIVs1; SmallVector<Value, 4> outLoopIVs1;
@ -105,7 +106,7 @@ struct ONNXPadOpLowering : public ConversionPattern {
outLoopIVs1.emplace_back(padLoops.getInductionVar(i)); outLoopIVs1.emplace_back(padLoops.getInductionVar(i));
auto paddingValue = rewriter.create<ConstantOp>(loc, valueAttr); auto paddingValue = rewriter.create<ConstantOp>(loc, valueAttr);
rewriter.create<StoreOp>(loc, paddingValue, alloc, outLoopIVs1); rewriter.create<AffineStoreOp>(loc, paddingValue, alloc, outLoopIVs1);
// Replace the original op with the generated code. // Replace the original op with the generated code.
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);

View File

@ -77,15 +77,17 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern {
if (pad_begin[i] == 0) { if (pad_begin[i] == 0) {
outLoopIVs.emplace_back(valueLoops.getInductionVar(i)); outLoopIVs.emplace_back(valueLoops.getInductionVar(i));
} else { } else {
auto outIV = rewriter.create<AddIOp>(loc, AffineMap indexWithOffsetMap =
rewriter.create<ConstantIndexOp>(loc, pad_begin[i]), AffineMap::get(1, 0, rewriter.getAffineDimExpr(0) + pad_begin[i]);
valueLoops.getInductionVar(i)); Value outIV = rewriter.create<AffineApplyOp>(loc, indexWithOffsetMap,
ArrayRef<Value>{valueLoops.getInductionVar(i)});
outLoopIVs.emplace_back(outIV); outLoopIVs.emplace_back(outIV);
} }
} }
auto inVal = rewriter.create<LoadOp>(loc, operandAdaptor.data(), inLoopIVs); auto inVal =
rewriter.create<StoreOp>(loc, inVal, alloc, outLoopIVs); rewriter.create<AffineLoadOp>(loc, operandAdaptor.data(), inLoopIVs);
rewriter.create<AffineStoreOp>(loc, inVal, alloc, outLoopIVs);
rewriter.setInsertionPointToStart(padLoops.getIterateBlock()); rewriter.setInsertionPointToStart(padLoops.getIterateBlock());
SmallVector<Value, 4> outLoopIVs1; SmallVector<Value, 4> outLoopIVs1;
@ -93,7 +95,7 @@ struct ONNXPadConstantValuePadOpLowering : public ConversionPattern {
outLoopIVs1.emplace_back(padLoops.getInductionVar(i)); outLoopIVs1.emplace_back(padLoops.getInductionVar(i));
auto inVal1 = rewriter.create<ConstantOp>(loc, constantValAttr); auto inVal1 = rewriter.create<ConstantOp>(loc, constantValAttr);
rewriter.create<StoreOp>(loc, inVal1, alloc, outLoopIVs1); rewriter.create<AffineStoreOp>(loc, inVal1, alloc, outLoopIVs1);
// Replace the original op with the generated code. // Replace the original op with the generated code.
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);

View File

@ -64,7 +64,8 @@ struct ONNXReshapeOpLowering : public ConversionPattern {
for (int i = 0; i < memRefShape.size(); ++i) { for (int i = 0; i < memRefShape.size(); ++i) {
Value index = emitConstantOp(rewriter, loc, rewriter.getIndexType(), i); Value index = emitConstantOp(rewriter, loc, rewriter.getIndexType(), i);
// Load index from array of indices. // Load index from array of indices.
Value loadedVal = rewriter.create<LoadOp>(loc, operands[1], index); Value loadedVal =
rewriter.create<AffineLoadOp>(loc, operands[1], index);
// If a dimension is zero, the actual dimension value is taken from the // If a dimension is zero, the actual dimension value is taken from the
// input tensor. // input tensor.
// //

View File

@ -92,8 +92,9 @@ struct ONNXSplitOpLowering : public ConversionPattern {
writeIndices.emplace_back(outputLoops.getInductionVar(r)); writeIndices.emplace_back(outputLoops.getInductionVar(r));
} }
// Insert copy. // Insert copy.
auto loadData = rewriter.create<LoadOp>(loc, operands[0], readIndices); auto loadData =
rewriter.create<StoreOp>(loc, loadData, allocs[i], writeIndices); rewriter.create<AffineLoadOp>(loc, operands[0], readIndices);
rewriter.create<AffineStoreOp>(loc, loadData, allocs[i], writeIndices);
} }
rewriter.replaceOp(op, allocs); rewriter.replaceOp(op, allocs);
return success(); return success();

View File

@ -80,8 +80,8 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
for (int i = 0; i < iterationBlock.getArguments().size(); ++i) for (int i = 0; i < iterationBlock.getArguments().size(); ++i)
outLoopIVs.emplace_back(iterationBlock.getArguments()[perm[i]]); outLoopIVs.emplace_back(iterationBlock.getArguments()[perm[i]]);
auto inVal = rewriter.create<LoadOp>(loc, data, inLoopIVs); auto inVal = rewriter.create<AffineLoadOp>(loc, data, inLoopIVs);
rewriter.create<StoreOp>(loc, inVal, alloc, outLoopIVs); rewriter.create<AffineStoreOp>(loc, inVal, alloc, outLoopIVs);
rewriter.replaceOp(op, alloc); rewriter.replaceOp(op, alloc);

View File

@ -14,10 +14,10 @@ func @test_enable_memory_pool(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops // CHECK: krnl.optimize_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg0[%arg1, %arg2] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<10x10xf32>
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32> // CHECK: affine.store [[ADDF1]], [[GETREF]][%arg1, %arg2] : memref<10x10xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops // CHECK: krnl.optimize_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
@ -43,26 +43,26 @@ func @test_enable_memory_pool_2(%arg0: tensor<10x10xf32>, %arg1: tensor<10x20xf3
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops // CHECK: krnl.optimize_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF1:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[ADDF1]], [[GETREF1]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops // CHECK: krnl.optimize_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD3:%.+]] = load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32> // CHECK: [[LOAD3:%.+]] = affine.load [[GETREF1]][%arg2, %arg4] : memref<10x10xf32>
// CHECK: [[LOAD4:%.+]] = load %arg1[%arg4, %arg3] : memref<10x20xf32> // CHECK: [[LOAD4:%.+]] = affine.load %arg1[%arg4, %arg3] : memref<10x20xf32>
// CHECK: [[LOAD5:%.+]] = load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> // CHECK: [[LOAD5:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32 // CHECK: [[MULF1:%.+]] = mulf [[LOAD3]], [[LOAD4]] : f32
// CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32 // CHECK: [[ADDF2:%.+]] = addf [[LOAD5]], [[MULF1]] : f32
// CHECK: store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> // CHECK: affine.store [[ADDF2]], [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: krnl.define_loops // CHECK: krnl.define_loops
// CHECK: krnl.optimize_loops // CHECK: krnl.optimize_loops
// CHECK: krnl.iterate // CHECK: krnl.iterate
// CHECK: [[LOAD6:%.+]] = load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32> // CHECK: [[LOAD6:%.+]] = affine.load [[GETREF0]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: [[LOAD7:%.+]] = load %arg1[%arg2, %arg3] : memref<10x20xf32> // CHECK: [[LOAD7:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x20xf32>
// CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32 // CHECK: [[ADDF3:%.+]] = addf [[LOAD6]], [[LOAD7]] : f32
// CHECK: store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32> // CHECK: affine.store [[ADDF3]], [[RES]][%arg2, %arg3] : memref<10x20xf32>
// CHECK: dealloc [[MEMPOOL1]] : memref<400xi8> // CHECK: dealloc [[MEMPOOL1]] : memref<400xi8>
// CHECK: dealloc [[MEMPOOL0]] : memref<800xi8> // CHECK: dealloc [[MEMPOOL0]] : memref<800xi8>
// CHECK: return [[RES]] : memref<10x20xf32> // CHECK: return [[RES]] : memref<10x20xf32>

File diff suppressed because it is too large Load Diff

View File

@ -16,10 +16,10 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Add /// Second Add
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -27,10 +27,10 @@ func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
@ -55,10 +55,10 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Mul /// Second Mul
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -66,10 +66,10 @@ func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
@ -94,10 +94,10 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Div /// Second Div
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -105,10 +105,10 @@ func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
@ -133,10 +133,10 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Sub /// Second Sub
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -144,10 +144,10 @@ func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
@ -172,10 +172,10 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1
// CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: affine.store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi1>
/// Second And /// Second And
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -183,10 +183,10 @@ func @test_and_and(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1 // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i1
// CHECK: store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: affine.store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xi1> // CHECK: dealloc [[RES]] : memref<10x10xi1>
@ -211,10 +211,10 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1
// CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: affine.store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi1>
/// Second Or /// Second Or
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -222,10 +222,10 @@ func @test_or_or(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor<*
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1 // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i1
// CHECK: store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: affine.store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xi1> // CHECK: dealloc [[RES]] : memref<10x10xi1>
@ -250,10 +250,10 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1
// CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: affine.store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi1>
/// Second Xor /// Second Xor
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -261,10 +261,10 @@ func @test_xor_xor(%arg0 : tensor<10x10xi1>, %arg1 : tensor<10x10xi1>) -> tensor
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi1> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xi1>
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1 // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i1
// CHECK: store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1> // CHECK: affine.store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi1>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xi1> // CHECK: dealloc [[RES]] : memref<10x10xi1>
@ -585,10 +585,10 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Sum /// Second Sum
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -596,10 +596,10 @@ func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
@ -624,11 +624,11 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Max /// Second Max
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -636,11 +636,11 @@ func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
@ -665,11 +665,11 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Min /// Second Min
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
@ -677,11 +677,11 @@ func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tens
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD1:%.+]] = affine.load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32> // CHECK: [[LOAD2:%.+]] = affine.load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32> // CHECK: affine.store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<10x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>