Add AffineScope trait to KrnlIterateOp and enable affine-loop-fusion pass (#140)

* Make KrnlIterate's IVs valid to AffineLoad/AffineStore

* [Unary elementwise op] Load/Store -> AffineLoad/AffineStore

* [Conv] Load/Store -> AffineLoad/AffineStore

* Add affine-loop-fusion pass

* typos

* Mistake when merging branch master

Co-authored-by: Tian Jin <tjingrant@gmail.com>
This commit is contained in:
Tung D. Le 2020-06-08 16:36:27 +09:00 committed by GitHub
parent cde1157d62
commit bb17fa965f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 159 additions and 149 deletions

View File

@ -548,11 +548,11 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
loopIVs.push_back(arg);
}
auto loadedVal = rewriter.create<LoadOp>(loc, X, loopIVs);
auto loadedVal = rewriter.create<AffineLoadOp>(loc, X, loopIVs);
auto loweredOpResult = emitScalarOpFor<ElementwiseUnaryOp>(
rewriter, loc, op, memRefType.getElementType(), {loadedVal});
// Store result in the resulting array.
rewriter.create<StoreOp>(loc, loweredOpResult, alloc, loopIVs);
rewriter.create<AffineStoreOp>(loc, loweredOpResult, alloc, loopIVs);
rewriter.replaceOp(op, alloc);

View File

@ -159,7 +159,7 @@ struct ONNXConvOpLowering : public ConversionPattern {
for (auto arg : spatialLoops.getIterateBlock()->getArguments())
resultIndices.emplace_back(arg);
// Store initializer value into output location.
rewriter.create<StoreOp>(loc, zero, alloc, resultIndices);
rewriter.create<AffineStoreOp>(loc, zero, alloc, resultIndices);
// 3.2 Define inner loops.
int64_t nInnerLoops = 1 + (kernelShape.size() - 2);
@ -176,14 +176,17 @@ struct ONNXConvOpLowering : public ConversionPattern {
// Emit the bias, if needed.
if (hasBias) {
auto loadResult = rewriter.create<LoadOp>(loc, alloc, resultIndices);
auto loadResult =
rewriter.create<AffineLoadOp>(loc, alloc, resultIndices);
SmallVector<Value, 4> biasIndices;
biasIndices.emplace_back(kernel);
auto loadBias = rewriter.create<LoadOp>(loc, biasOperand, kernel);
auto loadBias =
rewriter.create<AffineLoadOp>(loc, biasOperand, kernel);
auto resultWithBias =
rewriter.create<AddFOp>(loc, loadResult, loadBias);
// Store initializer value into output location.
rewriter.create<StoreOp>(loc, resultWithBias, alloc, resultIndices);
rewriter.create<AffineStoreOp>(
loc, resultWithBias, alloc, resultIndices);
}
//
@ -200,10 +203,16 @@ struct ONNXConvOpLowering : public ConversionPattern {
dataIndices.emplace_back(outerLoops.getInductionVar(nIndex));
// g * (C / group) + c
Value channelDepth = innerLoops.getInductionVar(cIndex);
if (group > 1)
channelDepth = rewriter.create<AddIOp>(loc, channelDepth,
rewriter.create<MulIOp>(
loc, subchannels, outerLoops.getInductionVar(gIndex)));
if (group > 1) {
AffineMap indexMap = AffineMap::get(2, 1,
/*g=*/rewriter.getAffineDimExpr(0) *
/*subchannel=*/rewriter.getAffineSymbolExpr(0) +
/*c=*/rewriter.getAffineDimExpr(1));
channelDepth = rewriter.create<AffineApplyOp>(loc, indexMap,
ValueRange(
ArrayRef<Value>{/*g=*/outerLoops.getInductionVar(gIndex),
/*c=*/channelDepth, /*subchannel=*/subchannels}));
}
dataIndices.emplace_back(channelDepth);
// sX * rX + kX
auto stridesAttribute = convOp.stridesAttr();
@ -215,12 +224,16 @@ struct ONNXConvOpLowering : public ConversionPattern {
for (int i = 0; i < kernelShape.size() - 2; ++i) {
Value spatialIndex = spatialLoops.getInductionVar(i);
// If strides are present then emit the correct access index.
int stride = 1;
if (stridesAttribute && strides[i] > 1)
spatialIndex = rewriter.create<MulIOp>(loc,
rewriter.create<ConstantIndexOp>(loc, strides[i]),
spatialLoops.getInductionVar(i));
dataIndices.emplace_back(rewriter.create<AddIOp>(
loc, spatialIndex, innerLoops.getInductionVar(i + 1)));
stride = strides[i];
AffineMap indexMap = AffineMap::get(2, 0,
/*sX=*/rewriter.getAffineDimExpr(0) * /*rX=*/stride +
/*kX=*/rewriter.getAffineDimExpr(1));
Value outIV = rewriter.create<AffineApplyOp>(loc, indexMap,
ValueRange(ArrayRef<Value>{spatialLoops.getInductionVar(i),
innerLoops.getInductionVar(i + 1)}));
dataIndices.emplace_back(outIV);
}
// 4.2 Prepare indices for accessing the kernel tensor.
@ -235,15 +248,15 @@ struct ONNXConvOpLowering : public ConversionPattern {
// 4.3 Compute convolution.
auto loadData =
rewriter.create<LoadOp>(loc, inputOperand, dataIndices);
rewriter.create<AffineLoadOp>(loc, inputOperand, dataIndices);
auto loadKernel =
rewriter.create<LoadOp>(loc, kernelOperand, kernelIndices);
rewriter.create<AffineLoadOp>(loc, kernelOperand, kernelIndices);
auto loadPartialSum =
rewriter.create<LoadOp>(loc, alloc, resultIndices);
rewriter.create<AffineLoadOp>(loc, alloc, resultIndices);
Value result = rewriter.create<AddFOp>(loc, loadPartialSum,
rewriter.create<MulFOp>(loc, loadData, loadKernel));
// 4.4 Store computed value into output location.
rewriter.create<StoreOp>(loc, result, alloc, resultIndices);
rewriter.create<AffineStoreOp>(loc, result, alloc, resultIndices);
}
}
}

View File

@ -74,7 +74,7 @@ def KrnlOptimizeLoopsOp : Op<Krnl_Dialect, "optimize_loops"> {
let parser = [{ return ::parse$cppClass(parser, result); }];
}
def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator]> {
def KrnlIterateOp : Op<Krnl_Dialect, "iterate", [ImplicitKrnlTerminator, AffineScope]> {
let summary = "iterate operation";
let description = [{
The "krnl.iterate" operation is conceptually equivalent to a nested for loops.

View File

@ -105,6 +105,8 @@ void addONNXToKrnlPasses(mlir::PassManager &pm) {
void addKrnlToAffinePasses(mlir::PassManager &pm) {
pm.addPass(mlir::createLowerKrnlPass());
// Fuse loops in Affine dialect.
pm.addPass(mlir::createLoopFusionPass());
}
void addKrnlToLLVMPasses(mlir::PassManager &pm) {

View File

@ -24,9 +24,9 @@ func @test_elementwise_op_with_scalar_values_1(%arg0 : tensor<f32>) -> tensor<*x
// CHECK-LABEL: test_elementwise_op_with_scalar_values_1
// CHECK: [[RES:%.+]] = alloc() : memref<f32>
// CHECK: [[LOAD:%.+]] = load %arg0[] : memref<f32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[] : memref<f32>
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
// CHECK: store [[EXP]], [[RES]][] : memref<f32>
// CHECK: affine.store [[EXP]], [[RES]][] : memref<f32>
// CHECK: return [[RES]] : memref<f32>
}
@ -217,9 +217,9 @@ func @test_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
// CHECK: store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -238,7 +238,7 @@ func @test_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
@ -246,7 +246,7 @@ func @test_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIVIDEND:%.+]] = subf [[EXP]], [[NEXP]] : f32
// CHECK: [[DIVISOR:%.+]] = addf [[EXP]], [[NEXP]] : f32
// CHECK: [[TANH:%.+]] = divf [[DIVIDEND]], [[DIVISOR]] : f32
// CHECK: store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -265,7 +265,7 @@ func @test_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -273,7 +273,7 @@ func @test_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVIDEND:%.+]] = subf [[EXP]], [[NEXP]] : f32
// CHECK: [[SINH_RES:%.+]] = divf [[DIVIDEND]], [[TWO]] : f32
// CHECK: store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -292,7 +292,7 @@ func @test_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -300,7 +300,7 @@ func @test_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVIDEND:%.+]] = addf [[EXP]], [[NEXP]] : f32
// CHECK: [[COSH_RES:%.+]] = divf [[DIVIDEND]], [[TWO]] : f32
// CHECK: store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -319,9 +319,9 @@ func @test_cos(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[COS:%.+]] = cos [[LOAD]] : f32
// CHECK: store [[COS]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[COS]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -340,9 +340,9 @@ func @test_log(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOG:%.+]] = log [[LOAD]] : f32
// CHECK: store [[LOG]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[LOG]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -361,14 +361,14 @@ func @test_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVISOR:%.+]] = addf [[ONE]], [[NEXP]] : f32
// CHECK: [[SIGMOID_RES:%.+]] = divf [[ONE]], [[DIVISOR]] : f32
// CHECK: store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -387,11 +387,11 @@ func @test_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[LTZERO]], [[ZERO]], [[LOAD]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -545,7 +545,7 @@ func @test_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{2.+}} : f32
@ -554,7 +554,7 @@ func @test_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SUB:%.+]] = subf [[EXP]], [[ONE]] : f32
// CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[SUB]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[MUL]], [[LOAD]] : f32
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -573,13 +573,13 @@ func @test_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
// CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[LOAD]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[MUL]], [[LOAD]] : f32
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -598,7 +598,7 @@ func @test_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
// CHECK: [[GAMMA:%.+]] = constant {{2.+}} : f32
@ -608,7 +608,7 @@ func @test_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SUB:%.+]] = subf [[MUL]], [[ALPHA]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD]], [[SUB]] : f32
// CHECK: [[SELU_RES:%.+]] = mulf [[GAMMA]], [[SELECT]] : f32
// CHECK: store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -627,7 +627,7 @@ func @test_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
@ -638,7 +638,7 @@ func @test_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SELECT1:%.+]] = select [[CMP1]], [[ADD]], [[ZERO]] : f32
// CHECK: [[CMP2:%.+]] = cmpf "olt", [[SELECT1]], [[ONE]] : f32
// CHECK: [[SELECT2:%.+]] = select [[CMP2]], [[SELECT1]], [[ONE]] : f32
// CHECK: store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -657,10 +657,10 @@ func @test_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32
// CHECK: store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -679,12 +679,12 @@ func @test_softplus(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ADD:%.+]] = addf [[EXP]], [[ONE]] : f32
// CHECK: [[SOFTPLUS_RES:%.+]] = log [[ADD]] : f32
// CHECK: store [[SOFTPLUS_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SOFTPLUS_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -703,12 +703,12 @@ func @test_softsign(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ABS:%.+]] = absf [[LOAD]] : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ADD:%.+]] = addf [[ABS]], [[ONE]] : f32
// CHECK: [[SOFTSIGN_RES:%.+]] = divf [[LOAD]], [[ADD]] : f32
// CHECK: store [[SOFTSIGN_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SOFTSIGN_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -955,9 +955,9 @@ func @test_sqrt(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[SQRT:%.+]] = sqrt [[LOAD]] : f32
// CHECK: store [[SQRT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SQRT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -1038,7 +1038,7 @@ func @test_sign_f(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[MINUS_ONE:%.+]] = constant {{-1.+}} : f32
@ -1046,7 +1046,7 @@ func @test_sign_f(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SELECT_PLUS:%.+]] = select [[GTZERO]], [[ONE]], [[MINUS_ONE]] : f32
// CHECK: [[EQZERO:%.+]] = cmpf "oeq", [[LOAD]], [[ZERO]] : f32
// CHECK: [[SIGN_RES:%.+]] = select [[EQZERO]], [[ZERO]], [[SELECT_PLUS]] : f32
// CHECK: store [[SIGN_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SIGN_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -1065,7 +1065,7 @@ func @test_sign_i(%arg0 : tensor<?x10xi32>) -> tensor<*xi32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xi32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xi32>
// CHECK: [[ZERO:%.+]] = constant 0 : i32
// CHECK: [[ONE:%.+]] = constant 1 : i32
// CHECK: [[MINUS_ONE:%.+]] = constant -1 : i32
@ -1073,7 +1073,7 @@ func @test_sign_i(%arg0 : tensor<?x10xi32>) -> tensor<*xi32> {
// CHECK: [[SELECT_PLUS:%.+]] = select [[GTZERO]], [[ONE]], [[MINUS_ONE]] : i32
// CHECK: [[EQZERO:%.+]] = cmpi "eq", [[LOAD]], [[ZERO]] : i32
// CHECK: [[SIGN_RES:%.+]] = select [[EQZERO]], [[ZERO]], [[SELECT_PLUS]] : i32
// CHECK: store [[SIGN_RES]], [[RES]][%arg1, %arg2] : memref<?x10xi32>
// CHECK: affine.store [[SIGN_RES]], [[RES]][%arg1, %arg2] : memref<?x10xi32>
// CHECK: return [[RES]] : memref<?x10xi32>
}
@ -1339,14 +1339,14 @@ func @test_conv_no_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2
// CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg6 = 0 to 2, [[INNER_LOOPS]]#1 -> %arg7 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg8 = 0 to 7) {
// CHECK: [[R1PLUSK1:%.+]] = addi %arg4, %arg7 : index
// CHECK: [[R2PLUSK2:%.+]] = addi %arg5, %arg8 : index
// CHECK: [[DATA:%.+]] = load %arg0[%arg2, %arg6, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x2x32x64xf32>
// CHECK: [[KERNEL:%.+]] = load %arg1[%arg3, %arg6, %arg7, %arg8] : memref<5x2x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = load %0[%arg2, %arg3, %arg4, %arg5] : memref<1x5x27x58xf32>
// CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg4, %arg7)
// CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg5, %arg8)
// CHECK: [[DATA:%.+]] = affine.load %arg0[%arg2, %arg6, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x2x32x64xf32>
// CHECK: [[KERNEL:%.+]] = affine.load %arg1[%arg3, %arg6, %arg7, %arg8] : memref<5x2x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = affine.load %0[%arg2, %arg3, %arg4, %arg5] : memref<1x5x27x58xf32>
// CHECK: [[MUL:%.+]] = mulf [[DATA]], [[KERNEL]] : f32
// CHECK: [[ADD:%.+]] = addf [[ACC_RES]], [[MUL]] : f32
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x27x58xf32>
// CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x27x58xf32>
// CHECK: }
// CHECK: }
// CHECK: }
@ -1384,19 +1384,19 @@ func @test_conv_bias_no_pad(%arg0 : tensor<1x2x32x64xf32>, %arg1 : tensor<5x2x6x
// CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg7 = 0 to 2, [[INNER_LOOPS]]#1 -> %arg8 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg9 = 0 to 7) {
// CHECK: [[R1PLUSK1:%.+]] = addi %arg5, %arg8 : index
// CHECK: [[R2PLUSK2:%.+]] = addi %arg6, %arg9 : index
// CHECK: [[DATA:%.+]] = load %arg0[%arg3, %arg7, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x2x32x64xf32>
// CHECK: [[KERNEL:%.+]] = load %arg1[%arg4, %arg7, %arg8, %arg9] : memref<5x2x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = load %0[%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg5, %arg8)
// CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg6, %arg9)
// CHECK: [[DATA:%.+]] = affine.load %arg0[%arg3, %arg7, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x2x32x64xf32>
// CHECK: [[KERNEL:%.+]] = affine.load %arg1[%arg4, %arg7, %arg8, %arg9] : memref<5x2x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = affine.load %0[%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: [[MUL:%.+]] = mulf [[DATA]], [[KERNEL]] : f32
// CHECK: [[ADD:%.+]] = addf [[ACC_RES]], [[MUL]] : f32
// CHECK: store [[ADD]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: affine.store [[ADD]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: }
// CHECK: [[BIAS1:%.+]] = load [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: [[BIAS2:%.+]] = load %arg2[%arg4] : memref<5xf32>
// CHECK: [[BIAS1:%.+]] = affine.load [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: [[BIAS2:%.+]] = affine.load %arg2[%arg4] : memref<5xf32>
// CHECK: [[BIAS3:%.+]] = addf [[BIAS1]], [[BIAS2]] : f32
// CHECK: store [[BIAS3]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: affine.store [[BIAS3]], [[RES]][%arg3, %arg4, %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: }
// CHECK: }
// CHECK: return [[RES]] : memref<1x5x27x58xf32>
@ -1435,16 +1435,15 @@ func @test_conv_no_bias_no_pad_w_group(%arg0 : tensor<1x9x32x64xf32>, %arg1 : te
// CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg7 = 0 to 3, [[INNER_LOOPS]]#1 -> %arg8 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg9 = 0 to 7) {
// CHECK: [[MUL2:%.+]] = muli [[CONST2]], %arg3 : index
// CHECK: [[ADD2:%.+]] = addi %arg7, [[MUL2]] : index
// CHECK: [[R1PLUSK1:%.+]] = addi %arg5, %arg8 : index
// CHECK: [[R2PLUSK2:%.+]] = addi %arg6, %arg9 : index
// CHECK: [[DATA:%.+]] = load %arg0[%arg2, [[ADD2]], [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x9x32x64xf32>
// CHECK: [[KERNEL:%.+]] = load %arg1[%[[ADD1]], %arg7, %arg8, %arg9] : memref<5x3x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = load %0[%arg2, %[[ADD1]], %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: [[ADD2:%.+]] = affine.apply #{{.*}}(%arg3, %arg7)[%c3]
// CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg5, %arg8)
// CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg6, %arg9)
// CHECK: [[DATA:%.+]] = affine.load %arg0[%arg2, [[ADD2]], [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x9x32x64xf32>
// CHECK: [[KERNEL:%.+]] = affine.load %arg1[%[[ADD1]], %arg7, %arg8, %arg9] : memref<5x3x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = affine.load %0[%arg2, %[[ADD1]], %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: [[MUL:%.+]] = mulf [[DATA]], [[KERNEL]] : f32
// CHECK: [[ADD:%.+]] = addf [[ACC_RES]], [[MUL]] : f32
// CHECK: store [[ADD]], [[RES]][%arg2, %[[ADD1]], %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: affine.store [[ADD]], [[RES]][%arg2, %[[ADD1]], %arg5, %arg6] : memref<1x5x27x58xf32>
// CHECK: }
// CHECK: }
// CHECK: }
@ -1483,18 +1482,14 @@ func @test_conv_no_bias_no_pad_w_strides(%arg0 : tensor<1x9x32x64xf32>, %arg1 :
// CHECK: } : () -> (!krnl.loop, !krnl.loop, !krnl.loop)
// CHECK: krnl.iterate([[OPT_INNER_LOOPS]]#0, [[OPT_INNER_LOOPS]]#1, [[OPT_INNER_LOOPS]]#2) with ([[INNER_LOOPS]]#0 -> %arg6 = 0 to 9, [[INNER_LOOPS]]#1 -> %arg7 = 0 to 6, [[INNER_LOOPS]]#2 -> %arg8 = 0 to 7) {
// CHECK: [[CONST_STRIDE1:%.+]] = constant 2 : index
// CHECK: [[MUL1:%.+]] = muli [[CONST_STRIDE1]], %arg4 : index
// CHECK: [[R1PLUSK1:%.+]] = addi [[MUL1]], %arg7 : index
// CHECK: [[CONST_STRIDE2:%.+]] = constant 2 : index
// CHECK: [[MUL2:%.+]] = muli [[CONST_STRIDE2]], %arg5 : index
// CHECK: [[R2PLUSK2:%.+]] = addi [[MUL2]], %arg8 : index
// CHECK: [[DATA:%.+]] = load %arg0[%arg2, %arg6, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x9x32x64xf32>
// CHECK: [[KERNEL:%.+]] = load %arg1[%arg3, %arg6, %arg7, %arg8] : memref<5x9x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = load %0[%arg2, %arg3, %arg4, %arg5] : memref<1x5x14x29xf32>
// CHECK: [[R1PLUSK1:%.+]] = affine.apply #{{.*}}(%arg4, %arg7)
// CHECK: [[R2PLUSK2:%.+]] = affine.apply #{{.*}}(%arg5, %arg8)
// CHECK: [[DATA:%.+]] = affine.load %arg0[%arg2, %arg6, [[R1PLUSK1]], [[R2PLUSK2]]] : memref<1x9x32x64xf32>
// CHECK: [[KERNEL:%.+]] = affine.load %arg1[%arg3, %arg6, %arg7, %arg8] : memref<5x9x6x7xf32>
// CHECK: [[ACC_RES:%.+]] = affine.load %0[%arg2, %arg3, %arg4, %arg5] : memref<1x5x14x29xf32>
// CHECK: [[MUL:%.+]] = mulf [[DATA]], [[KERNEL]] : f32
// CHECK: [[ADD:%.+]] = addf [[ACC_RES]], [[MUL]] : f32
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x14x29xf32>
// CHECK: affine.store [[ADD]], [[RES]][%arg2, %arg3, %arg4, %arg5] : memref<1x5x14x29xf32>
// CHECK: }
// CHECK: }
// CHECK: }
@ -1580,9 +1575,9 @@ func @test_abs_float(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ABS:%.+]] = absf [[LOAD]] : f32
// CHECK: store [[ABS]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[ABS]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32>
}
@ -1601,12 +1596,12 @@ func @test_abs_int(%arg0 : tensor<?x10xi32>) -> tensor<*xi32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xi32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xi32>
// CHECK: [[ZERO:%.+]] = constant 0 : i32
// CHECK: [[LESS_THAN_ZERO:%.+]] = cmpi "slt", [[LOAD]], [[ZERO]] : i32
// CHECK: [[NEGATIVE_LOAD:%.+]] = subi [[ZERO]], [[LOAD]] : i32
// CHECK: [[SELECT:%.+]] = select [[LESS_THAN_ZERO]], [[NEGATIVE_LOAD]], [[LOAD]] : i32
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xi32>
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xi32>
// CHECK: return [[RES]] : memref<?x10xi32>
}
@ -1994,14 +1989,14 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12
// CHECK: [[SIGMOID_INPUT:%.+]] = alloc() : memref<f32>
// CHECK: store [[It_OUTPUT]], [[SIGMOID_INPUT]][] : memref<f32>
// CHECK: {{.*}} = load [[SIGMOID_INPUT]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_INPUT]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: store {{.*}}, [[It]][] : memref<f32>
// CHECK: affine.store {{.*}}, [[It]][] : memref<f32>
// CHECK: [[It_LOAD:%.+]] = load [[It]][] : memref<f32>
// CHECK: [[XtWf_LOAD:%.+]] = load [[XtWf_GEMM]][] : memref<f32>
@ -2010,14 +2005,14 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12
// CHECK: [[SIGMOID_FORGET:%.+]] = alloc() : memref<f32>
// CHECK: store [[Ft_OUTPUT]], [[SIGMOID_FORGET]][] : memref<f32>
// CHECK: {{.*}} = load [[SIGMOID_FORGET]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_FORGET]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: store {{.*}}, [[Ft]][] : memref<f32>
// CHECK: affine.store {{.*}}, [[Ft]][] : memref<f32>
// CHECK: [[Ft_LOAD:%.+]] = load [[Ft]][] : memref<f32>
// CHECK: [[XtWc_LOAD:%.+]] = load [[XtWc_GEMM]][] : memref<f32>
@ -2026,7 +2021,7 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12
// CHECK: [[TANH_CELL:%.+]] = alloc() : memref<f32>
// CHECK: store [[ct_OUTPUT]], [[TANH_CELL]][] : memref<f32>
// CHECK: {{.*}} = load [[TANH_CELL]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[TANH_CELL]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
@ -2034,7 +2029,7 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: store {{.*}}, [[ct]][] : memref<f32>
// CHECK: affine.store {{.*}}, [[ct]][] : memref<f32>
// CHECK: [[ct_LOAD:%.+]] = load [[ct]][] : memref<f32>
// CHECK: [[FtCt1:%.+]] = mulf [[Ft_LOAD]], [[Ct1_LOAD]] : f32
@ -2048,19 +2043,19 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12
// CHECK: [[SIGMOID_OUTPUT:%.+]] = alloc() : memref<f32>
// CHECK: store [[Ot_OUTPUT]], [[SIGMOID_OUTPUT]][] : memref<f32>
// CHECK: {{.*}} = load [[SIGMOID_OUTPUT]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[SIGMOID_OUTPUT]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = constant 1.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}}: f32
// CHECK: {{.*}} = exp {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: store {{.*}}, [[Ot]][] : memref<f32>
// CHECK: affine.store {{.*}}, [[Ot]][] : memref<f32>
// CHECK: [[Ot_LOAD:%.+]] = load [[Ot]][] : memref<f32>
// CHECK: [[TANH_HIDDEN:%.+]] = alloc() : memref<f32>
// CHECK: store [[Ct]], [[TANH_HIDDEN]][] : memref<f32>
// CHECK: {{.*}} = load [[TANH_HIDDEN]][] : memref<f32>
// CHECK: {{.*}} = affine.load [[TANH_HIDDEN]][] : memref<f32>
// CHECK: {{.*}} = constant 0.000000e+00 : f32
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = exp {{.*}} : f32
@ -2068,7 +2063,7 @@ func @test_lstm_general_computation(%arg0: tensor<4x3x2xf32>, %arg1: tensor<1x12
// CHECK: {{.*}} = subf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = addf {{.*}}, {{.*}} : f32
// CHECK: {{.*}} = divf {{.*}}, {{.*}} : f32
// CHECK: store {{.*}}, [[hCt]][] : memref<f32>
// CHECK: affine.store {{.*}}, [[hCt]][] : memref<f32>
// CHECK: [[hCt_LOAD:%.+]] = load [[hCt]][] : memref<f32>
// CHECK: [[Ht:%.+]] = mulf [[Ot_LOAD]], [[hCt_LOAD]] : f32

View File

@ -290,9 +290,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
// CHECK: store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[EXP]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Exp
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -303,9 +303,9 @@ func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
// CHECK: store [[EXP]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[EXP]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -331,7 +331,7 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
@ -339,7 +339,7 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIVIDEND:%.+]] = subf [[EXP]], [[NEXP]] : f32
// CHECK: [[DIVISOR:%.+]] = addf [[EXP]], [[NEXP]] : f32
// CHECK: [[TANH:%.+]] = divf [[DIVIDEND]], [[DIVISOR]] : f32
// CHECK: store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[TANH]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Tanh
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -350,7 +350,7 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
// CHECK: [[EXP:%.+]] = exp [[LOAD]] : f32
@ -358,7 +358,7 @@ func @test_tanh_tanh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[DIVIDEND:%.+]] = subf [[EXP]], [[NEXP]] : f32
// CHECK: [[DIVISOR:%.+]] = addf [[EXP]], [[NEXP]] : f32
// CHECK: [[TANH_RES:%.+]] = divf [[DIVIDEND]], [[DIVISOR]] : f32
// CHECK: store [[TANH_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[TANH_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -384,7 +384,7 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -392,7 +392,7 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVIDEND:%.+]] = subf [[EXP]], [[NEXP]] : f32
// CHECK: [[SINH_RES:%.+]] = divf [[DIVIDEND]], [[TWO]] : f32
// CHECK: store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SINH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Sinh
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -403,7 +403,7 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -411,7 +411,7 @@ func @test_sinh_sinh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVIDEND:%.+]] = subf [[EXP]], [[NEXP]] : f32
// CHECK: [[SINH_RES:%.+]] = divf [[DIVIDEND]], [[TWO]] : f32
// CHECK: store [[SINH_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SINH_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -437,7 +437,7 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -445,7 +445,7 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVIDEND:%.+]] = addf [[EXP]], [[NEXP]] : f32
// CHECK: [[COSH_RES:%.+]] = divf [[DIVIDEND]], [[TWO]] : f32
// CHECK: store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[COSH_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Cosh
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -456,7 +456,7 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[TWO:%.+]] = constant {{2.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
@ -464,7 +464,7 @@ func @test_cosh_cosh(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVIDEND:%.+]] = addf [[EXP]], [[NEXP]] : f32
// CHECK: [[COSH_RES:%.+]] = divf [[DIVIDEND]], [[TWO]] : f32
// CHECK: store [[COSH_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[COSH_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -490,14 +490,14 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVISOR:%.+]] = addf [[ONE]], [[NEXP]] : f32
// CHECK: [[SIGMOID_RES:%.+]] = divf [[ONE]], [[DIVISOR]] : f32
// CHECK: store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SIGMOID_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Sigmoid
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -508,14 +508,14 @@ func @test_sigmoid_sigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[NLOAD:%.+]] = subf [[ZERO]], [[LOAD]] : f32
// CHECK: [[NEXP:%.+]] = exp [[NLOAD]] : f32
// CHECK: [[DIVISOR:%.+]] = addf [[ONE]], [[NEXP]] : f32
// CHECK: [[SIGMOID_RES:%.+]] = divf [[ONE]], [[DIVISOR]] : f32
// CHECK: store [[SIGMOID_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SIGMOID_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -541,11 +541,11 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[LTZERO]], [[ZERO]], [[LOAD]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[RELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Relu
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -556,11 +556,11 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[LTZERO:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[LTZERO]], [[ZERO]], [[LOAD]] : f32
// CHECK: store [[RELU_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[RELU_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -707,7 +707,7 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{2.+}} : f32
@ -716,7 +716,7 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SUB:%.+]] = subf [[EXP]], [[ONE]] : f32
// CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[SUB]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[MUL]], [[LOAD]] : f32
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Elu
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -727,7 +727,7 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{2.+}} : f32
@ -736,7 +736,7 @@ func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SUB:%.+]] = subf [[EXP]], [[ONE]] : f32
// CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[SUB]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[MUL]], [[LOAD]] : f32
// CHECK: store [[SELECT]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -762,13 +762,13 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
// CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[LOAD]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[MUL]], [[LOAD]] : f32
// CHECK: store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second LeakyRelu
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -779,13 +779,13 @@ func @test_leakyrelu_leakyrelu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
// CHECK: [[CMP:%.+]] = cmpf "olt", [[LOAD]], [[ZERO]] : f32
// CHECK: [[MUL:%.+]] = mulf [[ALPHA]], [[LOAD]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[MUL]], [[LOAD]] : f32
// CHECK: store [[SELECT]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -811,7 +811,7 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
// CHECK: [[GAMMA:%.+]] = constant {{2.+}} : f32
@ -821,7 +821,7 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SUB:%.+]] = subf [[MUL]], [[ALPHA]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD]], [[SUB]] : f32
// CHECK: [[SELU_RES:%.+]] = mulf [[GAMMA]], [[SELECT]] : f32
// CHECK: store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELU_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Selu
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -832,7 +832,7 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
// CHECK: [[GAMMA:%.+]] = constant {{2.+}} : f32
@ -842,7 +842,7 @@ func @test_selu_selu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SUB:%.+]] = subf [[MUL]], [[ALPHA]] : f32
// CHECK: [[SELECT:%.+]] = select [[CMP]], [[LOAD]], [[SUB]] : f32
// CHECK: [[SELU_RES:%.+]] = mulf [[GAMMA]], [[SELECT]] : f32
// CHECK: store [[SELU_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELU_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -868,7 +868,7 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
@ -879,7 +879,7 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SELECT1:%.+]] = select [[CMP1]], [[ADD]], [[ZERO]] : f32
// CHECK: [[CMP2:%.+]] = cmpf "olt", [[SELECT1]], [[ONE]] : f32
// CHECK: [[SELECT2:%.+]] = select [[CMP2]], [[SELECT1]], [[ONE]] : f32
// CHECK: store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT2]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second HardSigmoid
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -890,7 +890,7 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ZERO:%.+]] = constant {{0.+}} : f32
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[ALPHA:%.+]] = constant {{1.+}} : f32
@ -901,7 +901,7 @@ func @test_hardsigmoid_hardsigmoid(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: [[SELECT1:%.+]] = select [[CMP1]], [[ADD]], [[ZERO]] : f32
// CHECK: [[CMP2:%.+]] = cmpf "olt", [[SELECT1]], [[ONE]] : f32
// CHECK: [[SELECT2:%.+]] = select [[CMP2]], [[SELECT1]], [[ONE]] : f32
// CHECK: store [[SELECT2]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[SELECT2]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>
@ -927,10 +927,10 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load %arg0[%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32
// CHECK: store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
/// Second Reciprocal
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
@ -941,10 +941,10 @@ func @test_reciprocal_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg1 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg2 = 0 to 10) {
// CHECK: [[LOAD:%.+]] = load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[LOAD:%.+]] = affine.load [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: [[ONE:%.+]] = constant {{1.+}} : f32
// CHECK: [[RECIPROCAL_RES:%.+]] = divf [[ONE]], [[LOAD]] : f32
// CHECK: store [[RECIPROCAL_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: affine.store [[RECIPROCAL_RES]], [[RET_RES]][%arg1, %arg2] : memref<?x10xf32>
/// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32>