[MLIR] Add broadcasting support for element wise operations (#398)

* Add broadcasting support for elementwise operations

* Remove MLIRDialect from MLIRWholeArchiveLibs

* Rewrite getLoopIVsForBroadcasting

* Compute dimensions for allocating result memory

* Compute dimensions for allocating result memory (revised)

* Use static dimension for element-wise operation testcases

* Add a test for addition with broadcasting

* Missed Traits.h when merging

* Revise

* Update SharedWork.md

* Broadcasting for variadic operations

* Edit comments

* Update SharedWork.md

* Reorganize the code

* Add CHECK-LABEL for test_add_with_broadcasting
This commit is contained in:
TUNG LEDUC 2019-12-20 01:28:06 +09:00 committed by Tian Jin
parent 0a8af69e94
commit 06a968d4a1
6 changed files with 501 additions and 341 deletions

View File

@ -57,6 +57,7 @@ endfunction(find_mlir_lib)
find_mlir_lib(MLIRAffineOps) find_mlir_lib(MLIRAffineOps)
find_mlir_lib(MLIRAffineToStandard) find_mlir_lib(MLIRAffineToStandard)
find_mlir_lib(MLIRAnalysis) find_mlir_lib(MLIRAnalysis)
find_mlir_lib(MLIRDialect)
find_mlir_lib(MLIRExecutionEngine) find_mlir_lib(MLIRExecutionEngine)
find_mlir_lib(MLIRIR) find_mlir_lib(MLIRIR)
find_mlir_lib(MLIRLLVMIR) find_mlir_lib(MLIRLLVMIR)
@ -114,6 +115,7 @@ set(MLIRLibsOnce
MLIRAffineOps MLIRAffineOps
MLIRAffineToStandard MLIRAffineToStandard
MLIRAnalysis MLIRAnalysis
MLIRDialect
MLIRExecutionEngine MLIRExecutionEngine
MLIRIR MLIRIR
MLIRLLVMIR MLIRLLVMIR

View File

@ -8,10 +8,10 @@ ONNX operations for which some work is needed.
| ONNX Oper | Person working on it | ONNX 2 KRNL | Basic functionality | Extended functionality (e.g. broadcast) | | ONNX Oper | Person working on it | ONNX 2 KRNL | Basic functionality | Extended functionality (e.g. broadcast) |
| ---------- | --------------------- | -------------- | --------------------- | ---------------------------------------- | | ---------- | --------------------- | -------------- | --------------------- | ---------------------------------------- |
| Add | Tung (updated) | v | v | noM | | Add | Tung (updated) | v | v | M |
| And | Tung | v | v | noM | | And | Tung | v | v | M |
| Cosh | Tung | v | v | noM | | Cosh | Tung | v | v | |
| Div | Tung | v | v | | | Div | Tung | v | v | M |
| Elu | Tung | v | v | | | Elu | Tung | v | v | |
| Exp | Tung | v | v | | | Exp | Tung | v | v | |
| FullGemm | | | | noU | | FullGemm | | | | noU |
@ -19,18 +19,18 @@ ONNX operations for which some work is needed.
| HardSigmoid | Tung | v | v | | | HardSigmoid | Tung | v | v | |
| LeakyRelu | Tung | v | v | | | LeakyRelu | Tung | v | v | |
| MatMul | | | | noM | | MatMul | | | | noM |
| Max | Tung | v | v | noM | | Max | Tung | v | v | M |
| Min | Tung | v | v | noM | | Min | Tung | v | v | M |
| Mul | Tung | v | v | noM | | Mul | Tung | v | v | M |
| Or | Tung | v | v | noM | | Or | Tung | v | v | M |
| Relu | Tung | v | v | | | Relu | Tung | v | v | |
| Selu | Tung | v | v | | | Selu | Tung | v | v | |
| Sigmoid | Tung | v | v | | | Sigmoid | Tung | v | v | |
| Sinh | Tung | v | v | | | Sinh | Tung | v | v | |
| Sub | Tung | v | v | noM | | Sub | Tung | v | v | M |
| Sum | Tung | v | v | noM | | Sum | Tung | v | v | M |
| Tanh | Tung | v | v | | | Tanh | Tung | v | v | |
| Xor | Tung | v | v | noM | | Xor | Tung | v | v | M |
ONNX operations for which the work is completed (full functionality) and tested ONNX operations for which the work is completed (full functionality) and tested

View File

@ -8,6 +8,7 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "mlir/Dialect/Traits.h"
#include "mlir/IR/Block.h" #include "mlir/IR/Block.h"
#include "mlir/IR/Builders.h" #include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h" #include "mlir/IR/Function.h"
@ -21,6 +22,7 @@
#include "onnx_ops.hpp" #include "onnx_ops.hpp"
using namespace mlir; using namespace mlir;
using namespace mlir::OpTrait::util;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// ONNXOpsDialect // ONNXOpsDialect
@ -127,7 +129,12 @@ void ONNXReciprocalOp::inferShapes() {
/// Infer the output shape of the ONNXAddOp. This method is required by the /// Infer the output shape of the ONNXAddOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXAddOp::inferShapes() { void ONNXAddOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); if (!getOperand(0)->getType().isa<RankedTensorType>() ||
!getOperand(1)->getType().isa<RankedTensorType>())
return;
auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -135,7 +142,12 @@ void ONNXAddOp::inferShapes() {
/// Infer the output shape of the ONNXMulOp. This method is required by the /// Infer the output shape of the ONNXMulOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXMulOp::inferShapes() { void ONNXMulOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); if (!getOperand(0)->getType().isa<RankedTensorType>() ||
!getOperand(1)->getType().isa<RankedTensorType>())
return;
auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -143,7 +155,12 @@ void ONNXMulOp::inferShapes() {
/// Infer the output shape of the ONNXDivOp. This method is required by the /// Infer the output shape of the ONNXDivOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXDivOp::inferShapes() { void ONNXDivOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); if (!getOperand(0)->getType().isa<RankedTensorType>() ||
!getOperand(1)->getType().isa<RankedTensorType>())
return;
auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -151,7 +168,12 @@ void ONNXDivOp::inferShapes() {
/// Infer the output shape of the ONNXSubOp. This method is required by the /// Infer the output shape of the ONNXSubOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXSubOp::inferShapes() { void ONNXSubOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); if (!getOperand(0)->getType().isa<RankedTensorType>() ||
!getOperand(1)->getType().isa<RankedTensorType>())
return;
auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -159,21 +181,38 @@ void ONNXSubOp::inferShapes() {
/// Infer the output shape of the ONNXAndOp. This method is required by the /// Infer the output shape of the ONNXAndOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXAndOp::inferShapes() { void ONNXAndOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); if (!getOperand(0)->getType().isa<RankedTensorType>() ||
!getOperand(1)->getType().isa<RankedTensorType>())
return;
auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Or // Or
/// Infer the output shape of the ONNXOrOp. This method is required by the /// Infer the output shape of the ONNXOrOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXOrOp::inferShapes() { getResult()->setType(getOperand(0)->getType()); } void ONNXOrOp::inferShapes() {
if (!getOperand(0)->getType().isa<RankedTensorType>() ||
!getOperand(1)->getType().isa<RankedTensorType>())
return;
auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Xor // Xor
/// Infer the output shape of the ONNXXorOp. This method is required by the /// Infer the output shape of the ONNXXorOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXXorOp::inferShapes() { void ONNXXorOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); if (!getOperand(0)->getType().isa<RankedTensorType>() ||
!getOperand(1)->getType().isa<RankedTensorType>())
return;
auto lhsTy = getOperand(0)->getType().cast<RankedTensorType>();
auto rhsTy = getOperand(1)->getType().cast<RankedTensorType>();
getResult()->setType(getBroadcastedType(lhsTy, rhsTy));
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -183,7 +222,16 @@ void ONNXXorOp::inferShapes() {
/// Infer the output shape of the ONNXSumOp. This method is required by the /// Infer the output shape of the ONNXSumOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXSumOp::inferShapes() { void ONNXSumOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); for (int i = 0; i < getNumOperands(); ++i) {
if (!getOperand(i)->getType().cast<RankedTensorType>())
return;
}
Type resultTy = getOperand(0)->getType().cast<RankedTensorType>();
for (int i = 1; i < getNumOperands(); ++i) {
Type nextTy = getOperand(i)->getType().cast<RankedTensorType>();
resultTy = getBroadcastedType(resultTy, nextTy);
}
getResult()->setType(resultTy);
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -191,7 +239,16 @@ void ONNXSumOp::inferShapes() {
/// Infer the output shape of the ONNXMaxOp. This method is required by the /// Infer the output shape of the ONNXMaxOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXMaxOp::inferShapes() { void ONNXMaxOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); for (int i = 0; i < getNumOperands(); ++i) {
if (!getOperand(i)->getType().cast<RankedTensorType>())
return;
}
Type resultTy = getOperand(0)->getType().cast<RankedTensorType>();
for (int i = 1; i < getNumOperands(); ++i) {
Type nextTy = getOperand(i)->getType().cast<RankedTensorType>();
resultTy = getBroadcastedType(resultTy, nextTy);
}
getResult()->setType(resultTy);
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -199,7 +256,16 @@ void ONNXMaxOp::inferShapes() {
/// Infer the output shape of the ONNXMinOp. This method is required by the /// Infer the output shape of the ONNXMinOp. This method is required by the
/// shape inference interface. /// shape inference interface.
void ONNXMinOp::inferShapes() { void ONNXMinOp::inferShapes() {
getResult()->setType(getOperand(0)->getType()); for (int i = 0; i < getNumOperands(); ++i) {
if (!getOperand(i)->getType().cast<RankedTensorType>())
return;
}
Type resultTy = getOperand(0)->getType().cast<RankedTensorType>();
for (int i = 1; i < getNumOperands(); ++i) {
Type nextTy = getOperand(i)->getType().cast<RankedTensorType>();
resultTy = getBroadcastedType(resultTy, nextTy);
}
getResult()->setType(resultTy);
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -9,6 +9,8 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include <map>
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Sequence.h" #include "llvm/ADT/Sequence.h"
#include "mlir/Dialect/AffineOps/AffineOps.h" #include "mlir/Dialect/AffineOps/AffineOps.h"
@ -44,16 +46,51 @@ static MemRefType convertTensorToMemRef(TensorType type) {
} }
/// Insert an allocation and deallocation for the given MemRefType. /// Insert an allocation and deallocation for the given MemRefType.
static Value* insertAllocAndDealloc(MemRefType type, Location loc, static Value *insertAllocAndDealloc(MemRefType type, Location loc,
PatternRewriter& rewriter, bool insertDealloc, Value* oldMemRef = nullptr) { PatternRewriter &rewriter,
bool insertDealloc,
ArrayRef<Value *> operands = {}) {
// Put together alloc operands for any dynamic dimensions of the memref. // Put together alloc operands for any dynamic dimensions of the memref.
AllocOp alloc; AllocOp alloc;
if (oldMemRef) { if (!operands.empty()) {
SmallVector<Value*, 4> allocOperands;
auto memRefShape = type.getShape(); auto memRefShape = type.getShape();
for (int i = 0; i < memRefShape.size(); ++i) auto rank = memRefShape.size();
std::map<int, Value *> fromOperands;
for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
int memRefDimIdx = rank - 1 - reversedIdx;
if (memRefShape[memRefDimIdx] < 0) { // unknown dimension
Value *maxDim = nullptr;
for (int i = 0; i < operands.size(); i++) {
auto operandShape =
operands[i]->getType().cast<MemRefType>().getShape();
int operandDimIdx = operandShape.size() - 1 - reversedIdx;
if (operandDimIdx < 0)
continue;
// In case of operations with broadcasting, the dimension of the
// alloc result is the maximum size along each dimension of the
// operands.
auto operandDim =
rewriter.create<DimOp>(loc, operands[i], operandDimIdx);
if (maxDim) {
auto maxCondition = rewriter.create<CmpIOp>(loc, CmpIPredicate::sgt,
operandDim, maxDim);
maxDim = rewriter.create<SelectOp>(loc, maxCondition, operandDim,
maxDim);
} else {
maxDim = operandDim;
}
}
fromOperands.insert(std::make_pair(memRefDimIdx, maxDim));
}
}
SmallVector<Value *, 4> allocOperands;
for (int i = 0; i < rank; ++i)
if (memRefShape[i] < 0) if (memRefShape[i] < 0)
allocOperands.push_back(rewriter.create<DimOp>(loc, oldMemRef, i)); allocOperands.push_back(fromOperands[i]);
alloc = rewriter.create<AllocOp>(loc, type, allocOperands); alloc = rewriter.create<AllocOp>(loc, type, allocOperands);
} else { } else {
alloc = rewriter.create<AllocOp>(loc, type); alloc = rewriter.create<AllocOp>(loc, type);
@ -109,6 +146,89 @@ unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
return llvm::divideCeil(sizeInBits, 8); return llvm::divideCeil(sizeInBits, 8);
} }
// Get run-time dimension information for unknown dimensions used for
// broadcasting.
std::map<int, std::map<int, Value *> >
getBroadcastedDimInfo(Location loc, ConversionPatternRewriter &rewriter,
MemRefType memRefType, ArrayRef<Value *> operands) {
auto memRefShape = memRefType.getShape();
int64_t rank = memRefShape.size();
// For unknown dimensions, we need to get dimension values at runtime in
// order to do broadcasting.
std::map<int, std::map<int, Value *>> DimInfo;
// For each result dimension, compute the number of sharing operands.
// Sharing operands are operands sharing the same index (counting from the
// rightmost to the leftmost) for a given dimension.
std::map<int, int> sharedDimCount;
for (int reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
int dimIdx = rank - 1 - reversedIdx;
sharedDimCount[dimIdx] = 0;
for (int i = 0; i < operands.size(); ++i) {
auto shape = operands[i]->getType().cast<MemRefType>().getShape();
if (reversedIdx <= shape.size() - 1)
sharedDimCount[dimIdx]++;
}
}
// An unknown dimension can have a value of 1 or N (N > 1).
// If its value is 1, it is broadcasted dimension.
// Otherwise, non-broadcasted dimension.
// We only care about unknown dimensions whose number of sharing operands is
// more than one, since they are potentially broadcasted dimensions.
for (int i = 0; i < operands.size(); ++i) {
std::map<int, Value *> broadcastedDims;
auto shape = operands[i]->getType().cast<MemRefType>().getShape();
int size = shape.size();
for (int j = 0; j < shape.size(); ++j) {
if (shape[j] < 0 and sharedDimCount[rank - size + j] > 1) {
auto dim = rewriter.create<DimOp>(loc, operands[i], j).getResult();
auto one = rewriter.create<ConstantIndexOp>(loc, 1);
auto isBroadcasted =
rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, dim, one);
broadcastedDims.insert(std::make_pair(j, isBroadcasted));
}
}
DimInfo.insert(std::make_pair(i, broadcastedDims));
}
return DimInfo;
}
// Extract induction variables that are used for broadcasting values of a
// given operand.
std::vector<Value *>
getLoopIVsForBroadcasting(Location loc, ConversionPatternRewriter &rewriter,
ArrayRef<Value *> loopIVs, Value *operand,
std::map<int, Value *> broadcastedDims) {
// `operand` must has a ranked type. This should have been checked by the
// shape inference pass.
auto operandShape = operand->getType().cast<MemRefType>().getShape();
auto rank = operandShape.size();
auto loopCount = loopIVs.size();
std::vector<Value*> newLoopIVs;
for (unsigned reversedIdx = 0; reversedIdx < rank; ++reversedIdx) {
auto dimIdx = rank - 1 - reversedIdx;
auto loopIdx = loopCount - 1 - reversedIdx;
if (operandShape[dimIdx] == 1) {
// Broadcasted dimension
auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
newLoopIVs.insert(newLoopIVs.begin(), zero);
} else if ((operandShape[dimIdx] == -1) &&
(broadcastedDims.find(dimIdx) != broadcastedDims.end())) {
// Unknown dimension, it can have a value of 1 or N (N > 1).
// If its value is 1, it is broadcasted dimension.
// Otherwise, non-broadcasted dimension.
auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
auto idx = rewriter.create<SelectOp>(loc, broadcastedDims[dimIdx],
zero, loopIVs[loopIdx]);
newLoopIVs.insert(newLoopIVs.begin(), idx);
} else {
// Non-broadcasted dimension
newLoopIVs.insert(newLoopIVs.begin(), loopIVs[loopIdx]);
}
}
return newLoopIVs;
}
namespace { namespace {
template <typename ElementwiseNaryOp> template <typename ElementwiseNaryOp>
@ -505,7 +625,7 @@ struct ONNXElementwiseUnaryOpLowering : public ConversionPattern {
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, operands[0]); memRefType, loc, rewriter, insertDealloc, {operands[0]});
// Number of loops // Number of loops
auto memRefShape = memRefType.getShape(); auto memRefShape = memRefType.getShape();
@ -595,20 +715,18 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
// Insert an allocation and deallocation for the result of this operation. // Insert an allocation and deallocation for the result of this operation.
auto memRefType = convertTensorToMemRef(tensorType); auto memRefType = convertTensorToMemRef(tensorType);
// If the output has a dynamic dimension, pass the operands required for
// each dynamic dimension to the AllocOp. The first operand of the
// operation is used. The operands of the op need to match in terms of
// dimensions with the result at this pre-optimization phase.
// TODO: verify that dimensions match.
// TODO: can the dimension of the result differ after optimizations?
Value* alloc; Value* alloc;
bool insertDealloc = checkInsertDealloc(op); bool insertDealloc = checkInsertDealloc(op);
// If the output has a dynamic dimension, we compute its dimension at
// runtime by using dimensions from the operands.
// In particular, we need to know from which operand a result dimension
// comes from.
// TODO: can the dimension of the result differ after optimizations?
if (hasAllConstantDimensions(memRefType)) if (hasAllConstantDimensions(memRefType))
alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc); alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
else else
alloc = insertAllocAndDealloc( alloc = insertAllocAndDealloc(
memRefType, loc, rewriter, insertDealloc, operands[0]); memRefType, loc, rewriter, insertDealloc, operands);
// Number of loops // Number of loops
auto memRefShape = memRefType.getShape(); auto memRefShape = memRefType.getShape();
@ -639,13 +757,18 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
if (memRefShape[i] < 0) { if (memRefShape[i] < 0) {
pack.pushConstantBound(0); pack.pushConstantBound(0);
pack.pushOperandBound( pack.pushOperandBound(
rewriter.create<DimOp>(loc, operands[0], i).getResult()); rewriter.create<DimOp>(loc, alloc, i).getResult());
} else { } else {
pack.pushConstantBound(0); pack.pushConstantBound(0);
pack.pushConstantBound(memRefShape[i]); pack.pushConstantBound(memRefShape[i]);
} }
} }
// Get run-time dimension information for unknown dimensions used for
// broadcasting.
std::map<int, std::map<int, Value *>> broadcastedDimInfo =
getBroadcastedDimInfo(loc, rewriter, memRefType, operands);
auto iterateOp = rewriter.create<KrnlIterateOp>(loc, pack); auto iterateOp = rewriter.create<KrnlIterateOp>(loc, pack);
Block& iterationBlock = iterateOp.bodyRegion().front(); Block& iterationBlock = iterateOp.bodyRegion().front();
@ -655,8 +778,7 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
// 1. Insert any optimizations in the KrnlOptimizeLoopsOp body. // 1. Insert any optimizations in the KrnlOptimizeLoopsOp body.
rewriter.setInsertionPointToEnd(&optimizationBlock); rewriter.setInsertionPointToEnd(&optimizationBlock);
// Return from KrnlOptimizeLoopsOp body. // Return from KrnlOptimizeLoopsOp body.
// When no optimizations are present we just return the loops // When no optimizations are present we just return the loops unchaged.
// unchaged.
rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops); rewriter.create<KrnlReturnLoopsOp>(loc, originalLoops);
rewriter.setInsertionPoint(optimizedLoopsOp); rewriter.setInsertionPoint(optimizedLoopsOp);
@ -670,9 +792,13 @@ struct ONNXElementwiseVariadicOpLowering : public ConversionPattern {
// Fold over operands for each of their scalar values // Fold over operands for each of their scalar values
Value *accumulated, *next; Value *accumulated, *next;
accumulated = rewriter.create<LoadOp>(loc, operands[0], loopIVs); auto accumulatedLoopIVs = getLoopIVsForBroadcasting(
loc, rewriter, loopIVs, operands[0], broadcastedDimInfo[0]);
accumulated = rewriter.create<LoadOp>(loc, operands[0], accumulatedLoopIVs);
for (unsigned i = 1; i < numArgs; i++) { for (unsigned i = 1; i < numArgs; i++) {
next = rewriter.create<LoadOp>(loc, operands[i], loopIVs); auto nextLoopIVs = getLoopIVsForBroadcasting(
loc, rewriter, loopIVs, operands[i], broadcastedDimInfo[i]);
next = rewriter.create<LoadOp>(loc, operands[i], nextLoopIVs);
accumulated = mapToLowerScalarOp<ElementwiseVariadicOp>( accumulated = mapToLowerScalarOp<ElementwiseVariadicOp>(
op, memRefType.getElementType(), {accumulated, next}, rewriter); op, memRefType.getElementType(), {accumulated, next}, rewriter);
} }

View File

@ -1,143 +1,129 @@
// RUN: onnf-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s // RUN: onnf-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
func @test_add(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Add"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Add"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_add // CHECK-LABEL: test_add
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<10x10xf32>
} }
func @test_mul(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Mul"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Mul"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_mul // CHECK-LABEL: test_mul
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<10x10xf32>
} }
func @test_div(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Div"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Div"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_div // CHECK-LABEL: test_div
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<10x10xf32>
} }
func @test_sub(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Sub"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Sub"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_sub // CHECK-LABEL: test_sub
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<10x10xf32>
} }
func @test_and(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> { func @test_and(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
%0 = "onnx.And"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32> %0 = "onnx.And"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
"std.return"(%0) : (tensor<*xi32>) -> () "std.return"(%0) : (tensor<*xi32>) -> ()
// CHECK-LABEL: test_and // CHECK-LABEL: test_and
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
// CHECK: return [[RES]] : memref<?x10xi32> // CHECK: return [[RES]] : memref<10x10xi32>
} }
func @test_or(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> { func @test_or(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
%0 = "onnx.Or"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32> %0 = "onnx.Or"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
"std.return"(%0) : (tensor<*xi32>) -> () "std.return"(%0) : (tensor<*xi32>) -> ()
// CHECK-LABEL: test_or // CHECK-LABEL: test_or
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
// CHECK: return [[RES]] : memref<?x10xi32> // CHECK: return [[RES]] : memref<10x10xi32>
} }
func @test_xor(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> { func @test_xor(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
%0 = "onnx.Xor"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32> %0 = "onnx.Xor"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
"std.return"(%0) : (tensor<*xi32>) -> () "std.return"(%0) : (tensor<*xi32>) -> ()
// CHECK-LABEL: test_xor // CHECK-LABEL: test_xor
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
// CHECK: return [[RES]] : memref<?x10xi32> // CHECK: return [[RES]] : memref<10x10xi32>
} }
func @test_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
@ -310,66 +296,60 @@ func @test_reshape(%arg0 : tensor<?x10xf32>, %arg1 : tensor<4xi32>) -> tensor<*x
// CHECK: return [[ALLOC]] : memref<?x?x?x?xf32> // CHECK: return [[ALLOC]] : memref<?x?x?x?xf32>
} }
func @test_sum(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Sum"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Sum"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_sum // CHECK-LABEL: test_sum
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<10x10xf32>
} }
func @test_max(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Max"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Max"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_max // CHECK-LABEL: test_max
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<10x10xf32>
} }
func @test_min(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Min"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Min"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> () "std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_min // CHECK-LABEL: test_min
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<10x10xf32>
} }
func @test_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
@ -495,3 +475,29 @@ func @test_reciprocal(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32> // CHECK: store [[RECIPROCAL_RES]], [[RES]][%arg1, %arg2] : memref<?x10xf32>
// CHECK: return [[RES]] : memref<?x10xf32> // CHECK: return [[RES]] : memref<?x10xf32>
} }
func @test_add_with_broadcasting(%arg0 : tensor<?xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Add"(%arg0, %arg1) : (tensor<?xf32>, tensor<?x10xf32>) -> tensor<*xf32>
"std.return"(%0) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_add_with_broadcasting
// CHECK: [[DIM1:%.+]] = dim %arg1, 0 : memref<?x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM1]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM2:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[DIM3:%.+]] = dim %arg0, 0 : memref<?xf32>
// CHECK: [[ONE:%.+]] = constant 1 : index
// CHECK: [[IS_ONE:%.+]] = cmpi "eq", [[DIM3]], [[ONE]] : index
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: [[ZERO:%.+]] = constant 0 : index
// CHECK: %[[SELECT1:.+]] = select [[IS_ONE]], [[ZERO]], %arg3 : index
// CHECK: [[LOAD1:%.+]] = load %arg0[%[[SELECT1]]] : memref<?xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32>
// CHECK: }
// CHECK: return [[RES]] : memref<?x10xf32>
}

View File

@ -1,291 +1,263 @@
// RUN: onnf-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s // RUN: onnf-opt --shape-inference --lower-frontend %s -split-input-file | FileCheck %s
func @test_add_add(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_add_add(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Add"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Add"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
%1 = "onnx.Add"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32> %1 = "onnx.Add"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_add_add // CHECK-LABEL: test_add_add
/// First Add /// First Add
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[ADDF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Add /// Second Add
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADDF:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[ADDF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<10x10xf32>
} }
func @test_mul_mul(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_mul_mul(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Mul"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Mul"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
%1 = "onnx.Mul"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32> %1 = "onnx.Mul"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_mul_mul // CHECK-LABEL: test_mul_mul
/// First Mul /// First Mul
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[MULF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Mul /// Second Mul
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MULF:%.+]] = mulf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[MULF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<10x10xf32>
} }
func @test_div_div(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_div_div(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Div"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Div"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
%1 = "onnx.Div"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32> %1 = "onnx.Div"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_div_div // CHECK-LABEL: test_div_div
/// First Div /// First Div
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[DIVF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Div /// Second Div
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[DIVF:%.+]] = divf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[DIVF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<10x10xf32>
} }
func @test_sub_sub(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_sub_sub(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Sub"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Sub"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
%1 = "onnx.Sub"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32> %1 = "onnx.Sub"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_sub_sub // CHECK-LABEL: test_sub_sub
/// First Sub /// First Sub
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[SUBF]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Sub /// Second Sub
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[SUBF:%.+]] = subf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[SUBF]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<10x10xf32>
} }
func @test_and_and(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> { func @test_and_and(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
%0 = "onnx.And"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32> %0 = "onnx.And"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
%1 = "onnx.And"(%0, %arg1) : (tensor<*xi32>, tensor<?x10xi32>) -> tensor<*xi32> %1 = "onnx.And"(%0, %arg1) : (tensor<*xi32>, tensor<10x10xi32>) -> tensor<*xi32>
"std.return"(%1) : (tensor<*xi32>) -> () "std.return"(%1) : (tensor<*xi32>) -> ()
// CHECK-LABEL: test_and_and // CHECK-LABEL: test_and_and
/// First And /// First And
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[AND]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
/// Second And /// Second And
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[AND:%.+]] = and [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[AND]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xi32> // CHECK: dealloc [[RES]] : memref<10x10xi32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xi32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xi32>
// CHECK: return [[RET_RES]] : memref<?x10xi32> // CHECK: return [[RET_RES]] : memref<10x10xi32>
} }
func @test_or_or(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> { func @test_or_or(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
%0 = "onnx.Or"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32> %0 = "onnx.Or"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
%1 = "onnx.Or"(%0, %arg1) : (tensor<*xi32>, tensor<?x10xi32>) -> tensor<*xi32> %1 = "onnx.Or"(%0, %arg1) : (tensor<*xi32>, tensor<10x10xi32>) -> tensor<*xi32>
"std.return"(%1) : (tensor<*xi32>) -> () "std.return"(%1) : (tensor<*xi32>) -> ()
// CHECK-LABEL: test_or_or // CHECK-LABEL: test_or_or
/// First Or /// First Or
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[OR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
/// Second Or /// Second Or
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[OR:%.+]] = or [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[OR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xi32> // CHECK: dealloc [[RES]] : memref<10x10xi32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xi32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xi32>
// CHECK: return [[RET_RES]] : memref<?x10xi32> // CHECK: return [[RET_RES]] : memref<10x10xi32>
} }
func @test_xor_xor(%arg0 : tensor<?x10xi32>, %arg1 : tensor<?x10xi32>) -> tensor<*xi32> { func @test_xor_xor(%arg0 : tensor<10x10xi32>, %arg1 : tensor<10x10xi32>) -> tensor<*xi32> {
%0 = "onnx.Xor"(%arg0, %arg1) : (tensor<?x10xi32>, tensor<?x10xi32>) -> tensor<*xi32> %0 = "onnx.Xor"(%arg0, %arg1) : (tensor<10x10xi32>, tensor<10x10xi32>) -> tensor<*xi32>
%1 = "onnx.Xor"(%0, %arg1) : (tensor<*xi32>, tensor<?x10xi32>) -> tensor<*xi32> %1 = "onnx.Xor"(%0, %arg1) : (tensor<*xi32>, tensor<10x10xi32>) -> tensor<*xi32>
"std.return"(%1) : (tensor<*xi32>) -> () "std.return"(%1) : (tensor<*xi32>) -> ()
// CHECK-LABEL: test_xor_xor // CHECK-LABEL: test_xor_xor
/// First Xor /// First Xor
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[XOR]], [[RES]][%arg2, %arg3] : memref<10x10xi32>
/// Second Xor /// Second Xor
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xi32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xi32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xi32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xi32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xi32>
// CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32 // CHECK: [[XOR:%.+]] = xor [[LOAD1]], [[LOAD2]] : i32
// CHECK: store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<?x10xi32> // CHECK: store [[XOR]], [[RET_RES]][%arg2, %arg3] : memref<10x10xi32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xi32> // CHECK: dealloc [[RES]] : memref<10x10xi32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xi32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xi32>
// CHECK: return [[RET_RES]] : memref<?x10xi32> // CHECK: return [[RET_RES]] : memref<10x10xi32>
} }
func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_exp_exp(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
@ -572,131 +544,119 @@ func @test_relu_relu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<?x10xf32>
} }
func @test_sum_sum(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_sum_sum(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Sum"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Sum"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
%1 = "onnx.Sum"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32> %1 = "onnx.Sum"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_sum_sum // CHECK-LABEL: test_sum_sum
/// First Sum /// First Sum
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[ADD]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Sum /// Second Sum
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[ADD:%.+]] = addf [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[ADD]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<10x10xf32>
} }
func @test_max_max(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_max_max(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Max"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Max"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
%1 = "onnx.Max"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32> %1 = "onnx.Max"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_max_max // CHECK-LABEL: test_max_max
/// First Max /// First Max
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Max /// Second Max
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MAX:%.+]] = cmpf "ogt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MAX]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<10x10xf32>
} }
func @test_min_min(%arg0 : tensor<?x10xf32>, %arg1 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_min_min(%arg0 : tensor<10x10xf32>, %arg1 : tensor<10x10xf32>) -> tensor<*xf32> {
%0 = "onnx.Min"(%arg0, %arg1) : (tensor<?x10xf32>, tensor<?x10xf32>) -> tensor<*xf32> %0 = "onnx.Min"(%arg0, %arg1) : (tensor<10x10xf32>, tensor<10x10xf32>) -> tensor<*xf32>
%1 = "onnx.Min"(%0, %arg1) : (tensor<*xf32>, tensor<?x10xf32>) -> tensor<*xf32> %1 = "onnx.Min"(%0, %arg1) : (tensor<*xf32>, tensor<10x10xf32>) -> tensor<*xf32>
"std.return"(%1) : (tensor<*xf32>) -> () "std.return"(%1) : (tensor<*xf32>) -> ()
// CHECK-LABEL: test_min_min // CHECK-LABEL: test_min_min
/// First Min /// First Min
// CHECK: [[DIM_0:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: [[RET_RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32> // CHECK: [[RES:%.+]] = alloc() : memref<10x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim %arg0, 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load %arg0[%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[RELU_RES]], [[RES]][%arg2, %arg3] : memref<10x10xf32>
/// Second Min /// Second Min
// CHECK: [[DIM_0:%.+]] = dim [[RES]], 0 : memref<?x10xf32>
// CHECK: [[RET_RES:%.+]] = alloc([[DIM_0]]) : memref<?x10xf32>
// CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2 // CHECK: [[DEF_LOOPS:%.+]]:2 = krnl.define_loops 2
// CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops { // CHECK: [[OPT_LOOPS:%.+]]:2 = krnl.optimize_loops {
// CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1 // CHECK: krnl.return_loops [[DEF_LOOPS]]#0, [[DEF_LOOPS]]#1
// CHECK: } : () -> (!krnl.loop, !krnl.loop) // CHECK: } : () -> (!krnl.loop, !krnl.loop)
// CHECK: [[DIM_2:%.+]] = dim [[RES]], 0 : memref<?x10xf32> // CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to 10, [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) {
// CHECK: krnl.iterate([[OPT_LOOPS]]#0, [[OPT_LOOPS]]#1) with ([[DEF_LOOPS]]#0 -> %arg2 = 0 to [[DIM_2]], [[DEF_LOOPS]]#1 -> %arg3 = 0 to 10) { // CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD1:%.+]] = load [[RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<10x10xf32>
// CHECK: [[LOAD2:%.+]] = load %arg1[%arg2, %arg3] : memref<?x10xf32>
// CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[MIN:%.+]] = cmpf "olt", [[LOAD1]], [[LOAD2]] : f32
// CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32 // CHECK: [[RELU_RES:%.+]] = select [[MIN]], [[LOAD1]], [[LOAD2]] : f32
// CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<?x10xf32> // CHECK: store [[RELU_RES]], [[RET_RES]][%arg2, %arg3] : memref<10x10xf32>
/// Dealloc of first result. /// Dealloc of first result.
// CHECK: dealloc [[RES]] : memref<?x10xf32> // CHECK: dealloc [[RES]] : memref<10x10xf32>
// CHECK-NOT: dealloc [[RET_RES]] : memref<?x10xf32> // CHECK-NOT: dealloc [[RET_RES]] : memref<10x10xf32>
// CHECK: return [[RET_RES]] : memref<?x10xf32> // CHECK: return [[RET_RES]] : memref<10x10xf32>
} }
func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> { func @test_elu_elu(%arg0 : tensor<?x10xf32>) -> tensor<*xf32> {